%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% PACKAGES 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

	\documentclass[11pt, a4paper]{article}
	%\documentclass[10pt,a4paper,conference]{article}

% DOCUMENT FORMAT  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

	\usepackage[a4paper, portrait, margin=1in]{geometry}	% Package to control margins
	%\usepackage{fullpage} 						% Package to use full page
	\usepackage{parskip} 						% Package to tweak paragraph skipping
	\usepackage[hang,flushmargin]{footmisc} 	% Get rid of footnote indentation
	\usepackage{comment}
	
	\usepackage{siunitx}
	
	\usepackage{pdflscape}
	\usepackage{cancel}
%	\usepackage{figcaps}
	
	\usepackage{multicol} 						% Multicolumns for the bibliography
	
	\usepackage{makecell}
	\usepackage{enumerate}

% IMAGES AND TABLES  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

	\usepackage{subcaption} 		% Allows for subfigures with subcaptions 
	\usepackage{graphicx} 			% Allows for changing size of pictures etc.
	\usepackage{rotating}			% Allows sideways images
	%\usepackage{graphics} 			% for pdf, bitmapped graphics files
	%\usepackage{epsfig} 			% for postscript graphics files
	
	\usepackage{threeparttable}		% For notes on a table
	\usepackage{footnote} 			% Allows footnotes within tables
	\usepackage{booktabs} 			% Allows the use of \toprule, \midrule etc.
	\usepackage{multirow}			% Allows multirow cells in tables
	
	\usepackage[table]{xcolor}% http://ctan.org/pkg/xcolor, allows colouring in tables
%	\usepackage[section]{placeins}

	\usepackage{pdfpages}
	
	\AtBeginEnvironment{tablenotes}{\setstretch{1.1}\footnotesize}
	
	
	\usepackage{listings}
	
	\lstset{
    basicstyle=\scriptsize\ttfamily,
    backgroundcolor=\color{gray!05},
    frame=single,
    breaklines=true,
     basewidth={0.4em,0.4em},            % even tighter character spacing
    lineskip=-2pt,                      % tighter line spacing
%    xleftmargin=2em,                    % adjust left margin
%    xrightmargin=2em                    % adjust right margin
}

	
%	\usepackage{setspace}
	
	\def\bibfont{\footnotesize}

	
	


% BIB %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

	\usepackage{apacite}[natbibapa]
	\usepackage[authoryear, round]{natbib}
	\bibliographystyle{apacite}
	
%	\setlength{\bibsep}{2pt plus 0.3ex} % Set spacing between bib items

% FONTS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

	%\usepackage{times} 			% Times New Roman
	%\usepackage{mathptmx} 			% Times New Roman with Maths support
	\usepackage[sc]{mathpazo} 		% Use the Palatino font
	\linespread{1.2} 				% Line spacing - Palatino needs more space between lines
	\usepackage{microtype} 			% Slightly tweak font spacing for aesthetics
	%\setlength{\parskip}{1em}		% Between paragraph spacing
	%\setlength{\parindent}{1.5em}
	
	\usepackage{setspace}
	
	\usepackage[shortlabels]{enumitem} 			% Change format of lists

% SECTION TITLES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

	\usepackage{titlesec}			% Package for changing section headers
	
	\titleformat{\section}			% Change section headers
	{\bfseries\large}{\thesection}{1em}{}
	\titlespacing*{\section}{0pc}{0.7pc}{0.3pc}
	
	\titleformat{\subsection}		% Change subsection headers
	{\normalfont\itshape\bfseries}{\thesubsection}{1em}{}
	\titlespacing*{\subsection}{0pc}{1pc}{0.2pc}
	
	\titleformat{\subsubsection}		% Change subsubsection headers
	{\normalfont\itshape}{\thesubsubsection}{1em}{}
	\titlespacing*{\subsubsection}{0pc}{1pc}{0.1pc}
	
	\usepackage[labelfont={bf},textfont=it]{caption}  % Change table titles
	
		% APPENDIX TITLE PAGE
	\usepackage[toc, page]{appendix}

% HEADERS & FOOTERS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	
	%\usepackage{fancyhdr} 			% Headers and footers
	%\pagestyle{fancy} 				% All pages have headers and footers
	%\fancyhead{} 					% Blank out the default header
	%\fancyfoot{} 					% Blank out the default footer
	%\fancyhead[C]{Running title $\bullet$ May 2016 $\bullet$ Vol. XXI, No. 1} % Custom header text
	%\fancyfoot[RO,LE]{\thepage} 	% Custom footer text
	
	\interfootnotelinepenalty=10000 % Stop footnotes breaking


% MATHS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

	\usepackage{amsmath} 		% Maths package
	\usepackage{amssymb}  		% Maths symbols package
	\usepackage{eurosym} 		% Allows you to use the Euro symbol
	
	\usepackage{bbm}			% Allows use of indicator
	\newcommand{\1}[1]{\mathbbm{1}\left\lbrace#1\right\rbrace} % Create indicator function

% HYPERLINKS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

	\usepackage{hyperref}		% Install package
	
		\definecolor{lightblue}{HTML}{b8e7f7} % Define lightblue
	\definecolor{lightred}{HTML}{ff6961}
% Standard setup - all blue
%	\hypersetup{
%		colorlinks=true,
%		linkcolor=blue,
%		filecolor=magenta, 
%		citecolor=blue,        	% Color of links to bibliography     
%		urlcolor=blue,			% Color of links to web
%	}

	% Vibey setup - nice thick underlined
	% See: https://tex.stackexchange.com/questions/26071/how-can-i-have-colored-and-underlined-links-with-hyperref
%  Define lightred
	\hypersetup{%
		%colorlinks=false,% hyperlinks will be black
		colorlinks=true,
		citebordercolor=lightblue,
%		urlbordercolor={1 1 1},
%		urlbordercolor={1 1 0},
		urlbordercolor=lightblue,
		linkbordercolor=white,
		pdfborder={0 0 2},
		pdfborderstyle={/S/U/W 2},% border style will be underline of width 1pt
		linkcolor=lightred,
		filecolor=magenta,    
		urlcolor=blue,			% Color of links to web
		citecolor = black
	}
	
	\newcommand{\speciallink}[2]{%
		\begingroup
		\hypersetup{pdfborder={0 0 0}}
		\href{#1}{#2}
		\endgroup
	}
	
	\makeatletter
%	\Hy@AtBeginDocument{%
%		\def\@pdfborder{0 0 2}% Overrides border definition set with colorlinks=true
%		\def\@pdfborderstyle{/S/U/W 2}% Overrides border style set with colorlinks=true
%		% Hyperlink border style will be underline of width 1pt
%	}
	
	
	
	\makeatother
	
	




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TITLE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

	
	
	
	\title{Silence to Solidarity: How Communication About a Minority Affects Discrimination
		%\Large {\color{blue}Preliminary Draft} 
	}
	
	\author{Duncan Webb*}

	\date{\textit{\today}}
	
%	\thanks{I thank my supervisors, Karen Macours and Suanna Oh, for their fantastic support and advice. I also thank David Atkin, Abhijit Banerjee, Esther Duflo,  Supreet Kaur, Eliana La Ferrara, Edward Miguel, Frank Schilbach, Gautam Rao, and many seminar participants at the behavioral and Development lunches at PSE, MIT and Berkeley, for helpful comments and feedback. This work has been generously supported by JPAL's Crime and Violence Initiative, EUR-PgSE, and CEPREMAP.}}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% DOCUMENT
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{document}
	
	
	
\maketitle
\thispagestyle{empty}
%\pagestyle{empty}

\begin{center}
	
	
%	\textbf{PRELIMINARY DRAFT: PLEASE DO NOT SHARE}
\end{center}

\vspace{-3em}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{center}
%\vspace{-0.5em}	
%Job Market Paper



\vspace{-0.5em}	
\speciallink{https://drive.google.com/file/d/1EVdCIgSKs9Vk6x8sv2FHhSTItWJsLrKF/view}{Download latest version}
%	\textbf{PRELIMINARY DRAFT: PLEASE DO NOT SHARE}
\vspace{0.8em}
\end{center}


\begin{abstract}
	\linespread{1.25}\selectfont
	\vspace{-0.8em}	
	
	
%	Discrimination is often conceived of as an individual decision that is hard to change
%Individual-level discrimination is often attributed to deep-seated prejudice that is difficult to change. But at the societal level, we sometimes observe rapid reductions in discriminatory preferences, suggesting that social interactions and the communication they entail might drive such shifts. I examine whether discrimination can be reduced by two types of communication about a minority: (i) \textit{horizontal communication} between majority-group members, or (ii) \textit{top-down communication} from agents of authority (e.g., the legal system). I run a field experiment in urban India ($N$=3,397) that measures discrimination against a  marginalized community of transgender people. Non-transgender participants are highly discriminatory: in a control condition, they sacrifice 1.9x their daily food expenditure to avoid hiring a transgender worker to deliver groceries to their home. But horizontal communication between  participants sharply reduces discrimination: participants who were earlier involved in a group discussion with two of their neighbors no longer discriminate on average, even when making private post-discussion choices. This effect is 1.7x larger than the effect of top-down communication that informs participants about the legal rights of transgender people. The discussion's effects are not driven by virtue signaling or correcting a misperceived norm. Instead, participants appear to \textit{persuade} each other to be more pro-trans, partly because pro-trans participants are the most vocal in discussions.

%The rapid shifts in discrimination seen in some historical episodes are often attributed to changes in social contact between a minority and a majority. But in a world 



%Individual-level discrimination is often attributed to deep-seated prejudice that is difficult to change. But at the societal level, we sometimes observe rapid reductions in discriminatory preferences, suggesting that social interactions and the communication they entail might drive such shifts.

%Discrimination is often attributed to deep-seated prejudice, but some historical episodes involve rapid shifts in discriminatory preferences. 

%Individual-level discrimination is often attributed to deep-seated prejudice, but sometimes we observe rapid societal-


%Discrimination is often 


% 100 word version


%Rewrite this abstract so that it is 100 words or less

%If people mostly interact within their identity group, can horizontal communication between majority-group members change behavior towards minorities? In a field experiment in India ($N$=3,397), non-transgender participants strongly discriminate against transgender workers when hiring for a grocery delivery. But a discussion with two non-transgender neighbors eliminates this discrimination. The discussion is about 1.7x more effective at reducing discrimination than information about transgender people's legal rights, and its effects appear to be driven by norm-based persuasion. The effect appears driven by norm-based persuasion: pro-transgender participants speak up more during discussions, creating a perception of anti-discriminatory norms that influences others' subsequent private choices.

People predominantly talk within their identity groups. Conversations between majority-group members may therefore be crucial in shaping discrimination against minorities. 
In an experiment in India ($N$=3,397), non-transgender participants strongly discriminate against transgender workers when hiring for a grocery delivery, but a discussion with two non-transgender neighbors eliminates this discrimination in private post-discussion choices. The discussion is 1.7x more effective at reducing discrimination than information about transgender people's legal rights, and  effects partially persist after 1 month. The evidence is consistent with norm-based persuasion, in which pro-transgender participants are more vocal and create a perceived anti-discriminatory norm that reduces subsequent discrimination.

%In a field experiment in India (N=3,397), non-transgender participants strongly discriminate against transgender workers when hiring for grocery delivery.

%I examine how discrimination 




% (i) \textit{horizontal communication} between majority-group members, and (ii) \textit{top-down communication} from societal authorities. In an experiment in India ($N$=3,397), non-transgender participants are highly discriminatory against transgender workers when hiring for a grocery delivery. But a group discussion with two non-transgender neighbors eliminates this discrimination. The discussion is more effective at reducing discrimination than information about transgender people's legal rights, and its effects appear to be driven by interpersonal persuasion rather than virtue signaling. 



%Sometimes discriminatory behavior changes rapidly at the group level, suggesting that interpersonal communication about a minority may underlie such shifts. I examine how discrimination is affected by: (i) \textit{horizontal communication} between majority-group members, and (ii) \textit{top-down communication} from societal authorities. In an experiment in India ($N$=3,397), non-transgender participants are highly discriminatory against transgender workers when hiring for a grocery delivery. But a group discussion with two non-transgender neighbors eliminates this discrimination. The discussion is more effective at reducing discrimination than information about transgender people's legal rights, and its effects appear to be driven by interpersonal persuasion rather than virtue signaling. 
 
% MEDIUM VERSION 
%While individual prejudice can be hard to change, sometimes discriminatory behavior changes rapidly at the group level, suggesting that interpersonal communication may underlie such shifts. I examine how discrimination is affected by two types of communication about a minority: (i) \textit{horizontal communication} between majority-group members, and (ii) \textit{top-down communication} from agents of authority (e.g., the legal system). In a field experiment in urban India ($N$=3,397), non-transgender participants are highly discriminatory against transgender workers when hiring for a grocery delivery. But if they engage in horizontal communication – a group discussion with two non-transgender neighbors – this discrimination is eliminated on average, even in private post-discussion choices. The discussion is 1.7x more effective than information about the legal rights of transgender people, and the effects appear to be driven by interpersonal persuasion rather than virtue signaling. 

% LONGER VERSION
%Individual-level discrimination is often considered difficult to change, resulting from deep-seated prejudice or rigid beliefs. But sometimes we see rapid shifts in discriminatory preferences at the group level, suggesting that social interactions and communication may underly these shifts. I examine how discrimination is affected by two types of communication about a minority: (i) \textit{horizontal communication} between majority-group members, and (ii) \textit{top-down communication} from agents of authority (e.g., the legal system). I run a field experiment in urban India ($N$=3,397) that measures discrimination against a marginalized community of transgender people. Non-transgender participants are highly discriminatory: in a control condition, they sacrifice 1.9x their daily food expenditure to avoid hiring a transgender worker to deliver groceries to their home. But horizontal communication between non-transgender participants sharply reduces discrimination: if involved in a group discussion with two of their neighbors, they no longer discriminate on average, even in private post-discussion choices. The effect is 1.7x larger than the effect of top-down communication that informs participants about the legal rights of transgender people. The discussion's effects are not driven by virtue signaling or correcting a misperceived norm. Instead, participants \textit{persuade} each other to be more pro-trans, partly because pro-trans participants are the most vocal in discussions and make use of persuasive moral arguments.

%Individual-level discrimination is often believed to be the result of deep-seated prejudice or beliefs that are difficult to change. But sometimes at the societal-level, we observe rapid shifts towards less discriminatory attitudes, suggesting that organic communication about a minority between actors in an economy might be generating such shifts. I examine whether \textit{horizontal communication} between majority group members or \textit{top-down communication} from agents of authority can lead to rapid reductions in discrimination.


%In a field experiment in Chennai, India ($N$=3397), I test this idea in the context of discrimination against the transgender community, an economically marginalized and highly visible group.




% . In a control condition, discrimination is strong: participants on average sacrifice 1.9x their daily food expenditure to avoid selecting a transgender individual to deliver food to their home. But if they were earlier involved in a group discussion and collective hiring decision with two of their neighbors, they no longer discriminate at all, even when making subsequent choices in private. This effect is around \textbf{X}x stronger than top-down communication, in which participants are informed about the legal rights of transgender persons in India. I show evidence that the results are not driven by a pre-existing norm that leads participants to be more pro-trans. Instead, participants \textit{persuade} each other in the discussion to be more pro-trans, partly because pro-trans participants are most vocal in the discussions. 







%But sometimes the communication 
%	
%	- discrimination individual, hard to change
%	
%	- but sometimes rapid changes in discriminatory attitudes
%	
%	- horizontal communcation ? or top-down communication?
%	
%	- test these ideas in LGBTQ in India
%	
%	- strong discrimination
%	
%	- horizontal communication (group discussions) strong reduction
%	
%	- topd-won communication about legal rights of minorities - smaler reduction
%	
%	- mechanisms; not driven by pure virtue signaling story, instead people persuade each other and pro-trans people are most vocal
	
	
	
	
	
	
	
%	Discrimination is often believed to be the result of deep-seated prejudice against a minority, or of beliefs that can only change upon the revelation of new information. But social context --- in particular, how people behave differently in groups --- may be a more important determinant of discrimination than traditional theories of discrimination suggest. This paper shows that involving majority-group members in a group discussion and hiring decision can sharply reduce hiring discrimination against a stigmatized minority. I focus on discrimination against the transgender community in India, a highly visible and economically vulnerable group. In a control condition, participants on average sacrifice almost double their daily food expenditure to avoid selecting a transgender individual to deliver food to their home. But if they were earlier involved in a group discussion and collective hiring decision with two of their neighbors, they no longer discriminate at all, even when making subsequent choices in private. This effect is stronger than the effect of informing people about the legal rights of transgender people, and the reduction in discrimination partially persists until around 1 month later. The results appear to be driven by the emergence of a strong pro-trans norm in the groups, supported by pro-social reasons for selecting transgender workers that persuade others to discriminate less.
	
%	This information reduces discrimination more modestly.
%	
%	
%	
%	
%	
%	When discussing transgender workers, participants cite highly pro-social reasons for selecting them. I provide suggestive evidence that the asymmetric persuasive power of these pro-social reasons, together with a norm that encourages pro-trans communication, may underly the effects of the discussion.
	
	
%	 persuasive generate asymmetric persuasion in favor of transgender workers. 
	
%	When prejudice is common but socially unacceptable, it may be possible to reduce discrimination by encouraging discourse about stigmatized minorities. People in the majority group may bias their discourse to sound more pro-minority, and they may mutually convince each other to discriminate less. I test this using a field experiment, focusing on discrimination towards the transgender community in India. I show that in a control condition, cis-gender participants on average sacrifice more than their daily food expenditure to avoid selecting a transgender individual to deliver food to their home. But after they are involved in a group discussion with two of their neighbors about who to select, participants no longer discriminate at all, even when making subsequent choices in private. The preliminary findings suggest that these effects are partially driven by changes in group-level norms, which subsequently influence individuals' private attitudes.
%	\input{../../outputs/stats/effect_discussion_vs_law.tex}x

	
\end{abstract}

\vspace{1em}



%\linespread{1.25} 
\footnotesize 
\begin{center}
	 \vspace{0.5em}
	\textbf{JEL Codes:} \textit{J15, D83, J71, C93, K38, Z13} \vspace{-0.5em}
	
	\textbf{Keywords}: \textit{discrimination, communication, social interactions, transgender, legal rights, persuasion} 
\end{center}

\scriptsize
*Nova School of Business and Economics, dmbwebb@gmail.com.  I'd like to thank my supervisors Karen Macours and Suanna Oh for all their fantastic advice and support, and to Abhijit Banerjee, Esther Duflo, Supreet Kaur, and Frank Schilbach for their invaluable guidance throughout the project. I'd also like to thank 5 anonymous reviewers, along with Peter Andre, Sam Asher, David Atkin, Luc Behaghel, David Bernard, Francis Bloch, Leonardo Bursztyn, Janet Currie, Denis Cogneau, Pascaline Dupas, Oliver Vanden Eynde, Evan Friedman, Amory Gethin, Julien Grenet, Deivy Houeix, Nicolas Jacquemet, Seema Jayachandran, Eliana La Ferrara, Sylvie Lambert, Elisa Macchi, David Margolis, Edward Miguel, Eduardo Montero, Oda Nedreg\aa rd, Kate Orkin, Garima Sharma, Morten Nyborg St\o stad, Nick Otis, Tom Raster, Kailash Rajah, Gautam Rao, Matthew Ridley, James Robinson, Chris Roth, Raul Sanchez de la Sierra,  Advik Shreekumar, Tavneet Suri, Simon Quinn, Jean-Marc Tallon, Lore Vandewalle, Liam Wren-Lewis, and many seminar participants at PSE, MIT, Berkeley, NEUDC, AFEDEV, the CEPR forum, and AFE for helpful comments and feedback. Akhilan Rengaswamy, Arun Balaji, Bhala Dhapandani, and Manvi Govil were invaluable in coordinating the fieldwork. This work has been generously supported by UK International Development, awarded through the J-PAL Crime and Violence Initiative, EUR-PgSE, CEPREMAP, the Development Research Group at PSE, the Weiss Fund, the Institute for Humane Studies (grant \# IHS017466), and  Funda\c{c}\~ao para a Ci\^encia e a Tecnologia (UID/00124/2025, UID/PRR/124/2025, Nova School of Business and Economics), and LISBOA2030 (DataLab2030 - LISBOA2030-FEDER-01314200). All errors are my own. This study was pre-registered in the AEA RCT Registry under the unique identifying number AEARCTR-0010953. The study was approved by the Institutional Review Board at both Paris School of Economics and the Institute for Financial Management, Chennai.

\normalsize
\pagebreak	

%
%
%	\author{Duncan Webb\\Paris School of Economics
%		
%	\date{February 2023}                       
%	

	\hypersetup{%
	%colorlinks=false,% hyperlinks will be black
	colorlinks=true,
	citebordercolor=lightblue,
	%		urlbordercolor={1 1 1},
	%		urlbordercolor={1 1 0},
	urlbordercolor=lightblue,
	linkbordercolor=white,
	pdfborder={0 0 2},
	pdfborderstyle={/S/U/W 2},% border style will be underline of width 1pt
	linkcolor=lightred,
	filecolor=magenta,    
	urlcolor=blue,			% Color of links to web
	citecolor = black
}

	\setcounter{page}{1}
	\section{Introduction}
	
	
	
%	{\color{red}MAKE SURE FOCUS OF THE PARAG is on \textit{behavior} not attitudes and stuff}
	
	Discriminatory behavior harms equity and efficiency in a wide range of economic domains, including in firms \citep{hjortEthnicDivisionsProduction2014, gloverDiscriminationSelfFulfillingProphecy2017, hedegaardPricePrejudice2018}, the labor market \citep{charlesPrejudiceWagesEmpirical2008, hsiehAllocationTalentEconomic2019, folkeSexualHarassmentGender2022, ashrafGenderRolesMisallocation2022}, housing \citep{christensenDamagesDistortionsDiscrimination2023},  %healthcare \citep{angererDiscriminationHealthCare2018}, 
	and informal social interactions \citep{loweTypesContactField2020}. Standard theories of discrimination frame it as an individual's decision, based on beliefs or deep-seated preferences that are difficult to change \citep{beckerEconomicsDiscrimination1957,phelpsStatisticalTheoryRacism1972,arrowHigherEducationFilter1973,aignerStatisticalTheoriesDiscrimination1977}. But at the societal level, we sometimes observe rapid changes in discriminatory preferences \citep{kuranPreferenceFalsificationPolicy1987, fernandezCulturalChangeLearning2013, sunsteinHowChangeHappens2019}. 
%	
%	For example, multiple countries have seen rapid increases in the proportion of who 
%	
	For example, multiple countries have seen rapid increases in the proportion of people who accept interethnic marriage, homosexuality, and equal rights for women over the course of a single generation.\footnote{For example: (i) the proportion of people in the US saying that homosexuality is wrong dropped rapidly in a single year (1993) and continued to decline quickly in the 30 following years \citep{fernandezComingOutAmerica2019};
	(ii) the proportion of people in the UK indicating discomfort with interethnic marriage dropped from 55\% in 1983 to 25\% in 2013 \citep{park2014british};  and (iii) the proportion of people in Uganda saying that women should have equal rights went from 63\% in 2002 to 80\% in 2012 \citep{chingwete2014support}.} Some of these changes may be driven by social contact \textit{between} majority and minority groups \citep{allportNaturePrejudice1954, paluckContactHypothesisReevaluated2019}. But in a world of homophily, where people mostly interact with others within their identity group \citep{jacksonOverviewSocialNetworks2011}, communication \textit{within} the majority group  may also be a powerful determinant of change.
	
	
	
	
	
	
	
%	This suggests that some discrimination may be driven by malleable factors related to social interactions, such as social norms, a desire to conform, and communication between people.
	
%	\textcolor{red}{In this paper, I therefore test how generating communication between majority-group members about a minority (``\textit{horizontal communication}'') affects discrimination. While standard Bayesian theories typically suggest that communication would ``average out'' how discriminatory any communicators are subsequently with no mean change but a convergence in behavior (e.g., \citealp{geanakoplosWeCantDisagree1982}, and using the martingale property of Bayesian updating).
% However, horizontal communication could also generate average shifts in post-communication behavior if there are asymmetries between pro- and anti-minority communication and/or how it is interpreted. For example, pro-minority communicators may be more vocal than discriminators in a discussion if they have stronger preferences against discrimination or because it is socially frowned upon to openly advocate for discrimination. If they are persuasive, this may imply that others discriminate less after the discussion. Of course, it could go the other way, in line with theories of groupthink in which communication typically amplifies any existing discrimination \citep{myersGroupPolarizationPhenomenon1976}.}
 
% \textcolor{red}{cite Myers / Lamm}
 
	
	In this paper, I therefore test how communication between majority-group members about a minority (``\textit{horizontal communication}'') affects discrimination. Standard models typically predict that communication merely averages out individual beliefs, leading beliefs to become more similar within a group, but not changing the mean belief or resulting average behavior.\footnote{For example, agents who truthfully share information will eventually converge to the same posterior belief in standard Bayesian models \citep{aumannAgreeingDisagree1976, geanakoplosWeCantDisagree1982}. And if agents' priors are not systematically incorrect, the martingale property of Bayesian updating implies that their beliefs should not systematically shift in one direction.} Yet horizontal communication could lead to large shifts in post-communication discrimination if there are asymmetries between pro- and anti-minority communication. For example, pro-minority individuals might be more persuasive, using moral arguments in their favor; anti-minority people might have stronger preferences against minorities, and so choose to speak up more in discussions; or anti-minority individuals may be unwilling to express discriminatory views if they are concerned about how they are perceived by others. Such asymmetries could drive changes in post-communication discrimination if communication affects people's attitudes, or beliefs, or the norm of behavior towards a minority.
	
%	  communicators may be more vocal than anti-minority, driven by stronger preferences against discrimination or weaker social sanctions from pro-minority behavior.  
	
	
	
%	 in who chooses to speak up
	
	
		
	
	
	
	
	
	
	
	
	 
	
	
	
	
	
	
%	While theories of groupthink suggest that communication typically amplifies any existing discrimination \citep{myersGroupPolarizationPhenomenon1976}, communication could also reduce discrimination through several channels. For example, even when discrimination is common, it can be socially unacceptable to discriminate. People may therefore tilt their communication in favor of a minority to avoid being \textit{perceived} as discriminatory, and may consequently convince others to be less discriminatory. %\footnote{This idea mirrors the logic of political correctness in other settings \citep{morrisPoliticalCorrectness2001, braghieriPoliticalCorrectnessSocial2021, golmanAcceptableDiscourseSocial2022}. 
	%If it is socially unacceptable to be openly racist, people may generate more favorable narratives about ethnic minorities, and thereby persuade others to discriminate less.}
%Another channel focuses on \textit{who} communicates in a group: if those who are supportive of a minority are particularly vocal in a discussion, they may persuade others to discriminate less.
%	
%	
%	 A second channel focuses on \textit{who} communicates in a group. Those who are more supportive of a minority may be particularly vocal in a discussion, because they are further from the discriminatory status quo, and thus have greater incentive change other people's minds. Instead of revealing themselves to be the odd one out with a pro-minority attitude, they may be able to change other people's attitudes to be ``on their side'' too, thus avoiding the social cost of deviating too far from their group. If pro-minority voices dominate and are persuasive, it could lead to an overall decrease in discrimination.
%	








%may have the power to communicate in a way about minorities that generates top-down changes in norms and therefore discrimination. In particular, I examine whether communicating about the legal rights of minorities can act as a signal that discrimination is not socially unacceptable, and thus reduce discrimination \textbf{(CITE LAWS + NORMS)} {\color{red}[not enough link between the two]}


%to generate top down changes in norms and therefore reduce discrimination. In particular, the legal system can give minorities more legal status, thus signaling to society that discrimination is not socially acceptable (\textbf{and leading to equilibrium changes in attitudes}), implying that \textit{communicating} about these rights can generate equilibrium changes... 


%In this paper, I explore a number of channels through which the interplay of norms and communication can lead to equilibrium changes in discrimination.  


	
%	In this study, I therefore ask whether rapid reductions in discrimination can be driven by generating organic communication about a minority, or by increasing the awareness of legal recognition for a minority. {\color{red}more detail on research Qs?}


I run a field experiment in urban Chennai, India ($N$=$\input{../../outputs/stats/total_n.tex}$) that tests these ideas in the context of discrimination against the most visible LGBTQ+ group in India: a community of transgender women known locally as \textit{thirunangai}. This setting is an appropriate context in which to study the effect of communication on discrimination. The community is vulnerable to extensive economic discrimination and violence \citep{u.s.statedept.2021CountryReports2021}, and their distinct visual identity and historic role in Indian society make them highly recognizable -- allowing me to measure discrimination only by showing photos. At the same time, there appears to be nascent social change towards greater acceptance of transgender people. This may create conditions in which communication reduces discrimination: for example, despite strong \textit{de facto} discrimination, there is also widespread agreement that discrimination is wrong.\footnote{Despite observing substantial hiring discrimination in my control group, 	\input{../../outputs/stats/prop_attitude_control_round.tex} of that same control group say that discrimination is unacceptable in response to a vignette that showcases explicit discrimination. There appears, therefore, to be a wedge between the descriptive norm (how much people actually discriminate) and the prescriptive norm (to what extent people think it is right or wrong to discriminate).
	} This raises the possibility that pro-trans individuals may be more vocal or more persuasive in such settings.
	
	
%	may try to \textit{appear} to be more pro-trans in a group setting, or that , thus persuading others to discriminate less.
%	raising the possibility that people may try to \textit{appear} to be more pro-trans in a group setting and therefore persuade others to discriminate less. 
	
	
	
	
	





 













%{\color{red}LINK PHRASE One such minority that faces persecution across much of the world is LGBTQ+ individuals.\footnote{For example, 175 out of 190 of the country-level US State Department's 2022 human rights reports indicate some form of discrimination or violence against LGBTQ+ persons \citep{unitedstatesstatedepartment2022CountryReports2022}.} Even though anti-LGBTQ+ persecution incurs significant societal costs \citep{badgett2014economic, badgettRelationshipLGBTInclusion2019}, very little research in lower- and middle-income countries has examined policy levers to reduce it (\citealp{badgettLGBTQEconomics2021}).} 


%My study focuses on the most visible LGBTQ+ group in India: a community of people called \textit{thirunangai}, primarily composed of transgender women. Their distinct visual identity and historical role in Indian society make them highly recognizable, but also make them vulnerable to extensive economic discrimination and violence \citep{u.s.governmentstatedept.2021CountryReports2021}. This setting is an ideal context to study the effect of communication on discrimination, since there appears to be nascent social change in favor of more acceptance of transgender people. Firstly, despite strong \textit{de facto} discrimination, discrimination also appears to be socially unnacceptable,\footnote{For example, despite observing substantial hiring discrimination in my control group, 	\input{../../outputs/stats/prop_attitude_control_round.tex} of that same control group say that discrimination is unacceptable in response to a vignette that showcases explicit discrimination. There appears, therefore, to be a wedge between the descriptive norm (how much do people actually discriminate?), and the prescriptive norm (to what extent do people think it is right or wrong to discriminate?).
%	
%	\citet{bicchieriNormsWildHow2017} gives a more precise definition of descriptive and prescriptive norms, along with other examples of when they can diverge. Crucially the definitions incorporate \textit{second-order beliefs}, i.e. beliefs about what other people's beliefs and attitudes are.  - for example, 
%	} raising the possibility, that people may try to appear to be more pro-trans in a group setting and therefore persuade others to discriminate less. In addition, recent legal advances, such as a Supreme Court ruling in 2014, have affirmed that transgender individuals have fundamental rights, {\color{red}a ruling that could act as a strong signal in favor of transgender individuals without directly impacting policy.}

%
%At the same time, there appear to be norms that discourage such discrimination,
%	raising the possibility that group settings may reduce discrimination significantly. {\color{red}\textbf{why is this a good setting?? - put this somewhere}}


%	are uncomfortable with a current discriminatory status quo may have a greater incentive a greater incentive to speak out in favor of a minority, relative to those comfortable with the status quo. 
	
%	Fear of being the odd one out...
%	If I don't think I can change others minds, then being pro-minority will make me odd one out. But if I can change others' minds, then I can get everyone on my side and I won't be so deviant....
	
	
	
			
	
	
%	uncomfortable with the status quo - greater incentive to speak out...
	
	
	
%	--------------------------------
	
	

	
%	{\color{red}
%			
	
	
%	 Discriminatory decisions are not taken by individuals in a social vacuum, as in standard economic theories \citep{beckerEconomicsDiscrimination1957,phelpsStatisticalTheoryRacism1972,arrowHigherEducationFilter1973,aignerStatisticalTheoriesDiscrimination1977}. 
	 
%	 Instead, discrimination may be highly influenced by people's social context, implying that \textit{groups} may behave very differently towards minorities than \textit{individuals}. For example, even when discrimination is common, it can be socially unacceptable to discriminate, implying that people in groups may bias their communication in favor of a minority in order to not be perceived as a discriminator. 
	%This links to a large literature suggesting that social image concerns -- concerns about how one is perceived by others -- are a key driver of behavior in a wide variety of economic domains \citep{benabouIncentivesProsocialBehavior2006, dellavignaVotingTellOthers2017, bursztynSocialImageEconomic2017, bursztynHowDoesPeer2015, bursztynExtremeMainstreamErosion2020, bursztynJustifyingDissent2023, karingSocialSignalingChildhood2021, bursztynStatusGoodsExperimental2018, bursztynActingWifeMarriage2017}.
%	}
	
%In this study, I explore how groups affect discrimination by asking: when majority-group members participate in group discussions, does this reduce subsequent discrimination against a minority in real-stakes hiring decisions? 

%	\citep{chakrapaniBarriersFreeAntiretroviral2011, sharmaChangingLandscapeSexual2014,  shivakumar2014markers, agoramoorthyLivingSocietalEdge2015,  malLetUsLive2015, ganjuStigmaViolenceHIV2017, malMolestationBengaliHijras2018, halliSuicidalityGenderMinorities2021, }. 
	

%}	
	
	
%	 \textbf{raising the possibility that people will persuade each other in groups....}
	
	
	
%	---------------------
	
%I run a field experiment ($N=\input{../../outputs/stats/total_n.tex}$) in urban Chennai, India, in which 


%-------------------


I first evaluate whether horizontal communication can affect discrimination by randomizing whether participants are involved in a group discussion with two of their neighbors. I measure the effect of this discussion on anti-transgender discrimination in a series of private, individual hiring choices after the discussion has ended. Participants are offered a free grocery delivery, and make a series of binary choices over the worker who will carry out the delivery (along with the items they will receive, which are randomly varied across choices). 
Participants who do not take part in a discussion are highly discriminatory: they are \input{../../outputs/stats/baseline_discrim_round.tex}  percentage points (\input{../../outputs/stats/baseline_discrim_perc.tex}) less likely to hire transgender workers than non-transgender workers ($p$$<$0.001). Their choices imply that they are willing to sacrifice grocery items worth \input{../../outputs/stats/wtp_control_hh_exp.tex}x the median daily food expenditure to avoid interacting with a transgender worker for 15 minutes. 


%measure anti-transgender discrimination in a series of private, individual hiring choices. Participants are offered a free grocery delivery, first evaluate the role of organic horizontal communication by randomizing whether participants are involved in a group discussion with two of their neighbors before making individual hiring choices.





% I measure the effect of this discussion on anti-transgender discrimination in a series of subsequent private, individual hiring choices. 

%I measure 

%I measure discrimination against transgender delivery workers by offering participants a free grocery delivery, and then asking them to make binary choices over both the items they will receive and the worker who will carry out the delivery. 
	
%	(\textbf{slightly more on the design here})
	
%	Participants make a series of 10 binary choices between two worker-and-item bundles, knowing that one of these choices will be randomly selected to be implemented. 
	
%	\textbf{ADD 3 neighbors}
	
	
%	{\color{red}I show that the communication generated by participants who are on average privately discriminatory can lead to large reductions in subsequent discrimination.}



Horizontal communication leads to large short-run reductions in subsequent discrimination: in private choices after a discussion, there is no discrimination against transgender workers on average. In the discussion condition, participants discuss a series of hiring options as a group of three neighbors and are asked to make collective hiring choices.\footnote{Including hiring choices in the discussion reduces the possibility of cheap talk by increasing the stakes of the discussion. It also implies that I measure the bundled effect of (i) observing others' choices and (ii) listening to the arguments made in the discussion. I later show evidence that both matter: observing others' choices without a discussion moderately reduces discrimination, while features like how much \textit{morality} was invoked in the arguments correlate with reduced discrimination even when controlling for choices.}$^{,}$\footnote{Groups were always same-gender to avoid overly hierarchical relationships between group members. To test for the effects of \textit{within-network} horizontal communication, to make the discussions more naturalistic, and to ensure social image concerns could affect behavior, we recruited neighbors who knew each other \input{../../outputs/stats/prop_neighbours_knew_each_other.tex} of the time.} Since some of these options include transgender workers, participants naturally discuss whether to hire transgender workers. Crucially, the only communication about transgender people in this discussion comes from the participants themselves, rather than from the discussion facilitator.
% even though the enumerator who leads the discussion never mentions the word "transgender" themselves. 


%
The effects of this discussion on discrimination are stark: in people's private, post-discussion hiring choices, participants are \input{../../outputs/stats/main_treatment_effect.tex} p.p. (\input{../../outputs/stats/main_treatment_effect_perc.tex}) more likely to select a transgender worker than the control group ($p$$<$0.001), implying that anti-transgender discrimination is reduced to 0 on average ($p$ of difference between transgender and non-transgender: \input{../../outputs/stats/treatment_discrim_p.tex}). The effects are also partially persistent, although there is substantial fadeout: when I re-survey participants approximately 1 month later, discussion participants are \input{../../outputs/stats/effect_discussion_follow_up.tex} p.p. more likely to select transgender workers than the control group in a series of hypothetical hiring choices ($p$=\input{../../outputs/stats/p_val_discussion_follow_up.tex}).


%{\color{red}----------}



%------

%\textcolor{red}{VIDEOS
%Second, I test whether communication about a minority from agents of authority (``\textit{top-down communication}'') can affect discrimination. One of the most powerful forms of such communication comes from the legal system: when minorities are granted legal status and this is communicated to citizens, this could act as a strong signal that discrimination is no longer socially acceptable, thus reducing discrimination \citep{sunsteinExpressiveFunctionLaw1996, mcadamsNormsLawEconomics2004, benabouLawsNorms2011}. Conversely, when discrimination is institutionalized and perpetuated by the legal system (e.g., in apartheid regimes), the same mechanism could work to amplify discrimination among the populace.
%In addition, recent legal advances have affirmed that transgender individuals have fundamental rights, but awareness of these advances is low, enabling me to test whether communicating about legal rights can reduce discrimination.}
%
%
%
%
%-----

	
I then compare the effects of the discussion with communication about a minority from an agent of authority, namely, the legal system (``\textit{top-down communication}''). I cross-randomize whether participants watch a video informing them about an Indian Supreme Court ruling that affirmed that transgender people have all the same fundamental rights as other citizens, including freedom from discrimination.\footnote{There are no interaction effects between the legal rights video and the group discussion ($p \in [\input{../../outputs/stats/p_vals_vid_discuss_interact.tex}]$). The effect of the discussion is also not driven by interaction effects; it is robust to only using the sample who were not informed about transgender rights.
}
%{\color{red}The law can act as a signal that discrimination is not socially acceptable, therefore exogenously manipulating a key aspect of social context---the perceived social norm---and allowing me to compare with the endogenous changes in norms that occur due to horizontal communication.
%}
Such communication could affect discrimination if it signals that discrimination is not socially acceptable \citep{sunsteinExpressiveFunctionLaw1996, mcadamsNormsLawEconomics2004, benabouLawsNorms2011} or if participants fear legal enforcement. Learning about these legal rights also lowers discrimination by \input{../../outputs/stats/effect_law_video.tex} p.p. ($p$$<$0.001). %They also discriminate less than others who are shown a video with persuasive messaging that advocates for the rights of transgender people without mentioning the law ($p$ of difference $\in [\input{../../outputs/stats/p_val_min_video_messaging_vs_law.tex}, \input{../../outputs/stats/p_val_max_video_messaging_vs_law.tex}]$, depending on the specification), suggesting that the legal authority of the Supreme Court may play some role. 
But the effects are only \input{../../outputs/stats/effect_law_vs_discussion.tex} as large as the effects of the group discussion ($p$ of difference $<$ 0.05), and do not persist after 1 month. In this context, therefore, horizontal communication about transgender people is substantially more effective than top-down information about the law. 


%In line with the expressive law hypothesis, participants who are told about these legal rights discriminate less against transgender people.
	
	
%	To further explore how perceived social context affects discrimination, I compare the effects of the endogenous communication that arises within groups to an exogenous shock to beliefs, attitudes, and norms. To do this,  
%I evaluate the effect of informing individuals about the legal rights of transgender people. Building off a literature on the \textit{expressive law hypothesis} that proposes that the law can change people's behavior by signaling a prevailing social norm \citep{sunsteinExpressiveFunctionLaw1996, benabouLawsNorms2011, mcadamsNormsLawEconomics2004}, I cross-randomize whether participants are given information about the recent advances in the legal rights of transgender people. Specifically, I inform them about an Indian Supreme Court ruling that affirmed that transgender people have all the same fundamental rights as other citizens, including freedom from discrimination.
% In line with the expressive law hypothesis, participants who are told about these legal rights discriminate less against transgender people.
% These participants also discriminate less than others who are shown persuasive messaging that advocates for the rights of transgender people---saying that they \textit{should} have rights, but without saying that they \textit{do}---suggesting that the legal authority of the Supreme Court plays an important role. However, the effects of explaining the law are only \input{../../outputs/stats/effect_law_vs_discussion.tex} as large as the effects of involving participants in a group discussion. In this context, therefore, allowing endogenous communication about transgender people to take place is a much more effective means of reducing discrimination than trying to actively reduce discrimination using top-down information. In line with this, the effects of information about legal rights do not persist when measured around 1 month later.

%I use a number of additional results to explore the mechanisms behind the effects of the discussion and the transgender rights information. 

%Interaction between norms and persuasion...



%\textcolor{red}{NEW MECHANISMS}

%I then seek to understand the mechanisms underlying the large reductions in discrimination after horizontal communication. Overall I show that the mechanism best supported by the data is that pro-trans participants are more willing to speak up in the discussions, and that they influence others to discriminate less by signalling a strong pro-trans norm.

%I then seek to understand the mechanisms underlying the large short-run reductions in discrimination after horizontal communication. 

I then seek to understand why discussions reduce discrimination in this context. The suggestive evidence is consistent with \textit{norm-based persuasion} as the primary mechanism, in which pro-transgender participants speak up more during discussions, use especially persuasive moral arguments, and thus generate a perception of a strong anti-discriminatory norm that persuades others to discriminate less in their subsequent private choices.\footnote{The implied \textit{persuasion rate} (\input{../../outputs/stats/persuasion_rate.tex}) falls at the upper end of the range in the literature \citep{dellavignaPersuasionEmpiricalEvidence2010}, but remains consistent with other successful in-person interventions and is plausible given the short-term nature of the effects.}


%\textcolor{red}{The implied \textit{persuasion rate} (\input{../../outputs/stats/persuasion_rate.tex}) is at the high end of the values in the \citep{dellavignaPersuasionEmpiricalEvidence2010}, but is in line with other in-person interventions and is plausible especially given the short-run nature of the main effects }


I first use a mechanism treatment to show the effects are driven by interpersonal persuasion rather than self-persuasion \citep{schwardmannSelfPersuasionEvidenceField2022}. Silent ``listeners'', who listen to but do not participate in discussions, discriminate 13 p.p. less afterward ($p$$<$0.001) -- an effect statistically indistinguishable from that of active participants. This finding supports interpersonal persuasion since listeners do not speak in the discussion and yet still show treatment effects as strong as participants. Consistent with this, the more pro-transgender statements a listener hears from others, the less she subsequently discriminates.


%I first use mechanism treatments to show that within-discussion behavior affects post-discussion choices through norm-based persuasion. 





%One mechanism treatment supports \textit{interpersonal} persuasion rather than \textit{self}-persuasion \citep{schwardmannSelfPersuasionEvidenceField2022} as the primary mechanism. In a treatment with silent ``listeners'', who listen to but do not participate in discussions, these listeners discriminate 13 p.p. less afterward ($p$$<$0.001) -- an effect statistically indistinguishable from that of active participants. Listeners do not engage in the discussion and yet still show strong treatment effects, consistent with interpersonal persuasion. 

%This finding points to , since listeners do not engage in the discussion and yet still show strong treatment effects. Consistent with this, the more pro-transgender statements a listener hears from others, the less she discriminates subsequently.


%\textcolor{red}{NEED TO EXPLAIN WITHIN-GROUP NORMS MEASUREMENT}

Second, I use correlational evidence to show that persuasion appears to operate through changes in perceived descriptive norms -- shifts in what people expect others to do. Norm updates alone suffice to reduce discrimination: in a treatment without discussions, ``observers" learn their group members' first-round choices, shift their beliefs about within-group norms, and subsequently discriminate less. Specifically, participants predict that their groups will privately discriminate 4.5 p.p. less if they have observed others' choices, and 
% these observers update their beliefs about the norm of discrimination within their group, predicting that their group members will privately discriminate 4.5 p.p. less in their later private choices than in the control group.
%perceived within-group norms shift toward selecting transgender workers (participants predict that their groups will privately discriminate 4.5 p.p. less if they have observed others' choices). 
this small norm update translates one-for-one into a 4.5 p.p. reduction in discrimination in observers' own private second-round choices. %For each additional group member observed selecting a transgender worker, observers increase their own selection by \input{../../outputs/stats/effect_of_announce_discrim_het_coeff.tex} p.p. 
This direct relationship between perceived norms and post-discussion discrimination confirms that updating perceived descriptive norms---even without the rich interaction of discussion---can reduce discrimination. I then show that the discussion generates large shifts in these perceived norms (24 p.p, $p$$<$0.001), while producing minimal changes in attitudes or beliefs about transgender workers. Since these norm shifts strongly mediate the discussions' effects on discrimination, this is consistent with norm-updates being a key driver of the discussion's effects.

% while not generating large shifs in attitudes or beliefs about transgender workers, and that these shifts mediate the discussions' effects on post-discussion discrimination, in line with norm-based persuasion.


%\textcolor{red}{UP TO HERE}


%The discussion also generates large norm shifts ( in favor of transgender workers, while having minimal impact on attitudes or beliefs about transgender workers. These norm shifts mediate the discussion's effects on discrimination, confirming that norm updating—even without rich interaction—effectively reduces discrimination.

%p<0.001), an effect identical to active participants'. Their discrimination decreases in direct proportion to the number of pro-transgender statements they hear. This indicates interpersonal persuasion drives the effects rather than self-persuasion, as listeners who never speak still change their behavior substantially after observing discussions.

%I first use mechanism treatments to show why within-discussion behavior impacts post-discussion behavior. 


%\textcolor{red}{summary}
%The evidence supports \textit{norm-based persuasion} as the primary mechanism, in which pro-transgender participants speak up more during discussions, are more likely to use especially persuasive moral arguments, and other participants do not correctly adjust for this. This behavior thus generates a perception of a strong anti-discriminatory norm that influences others' subsequent private choices.

%I first document how behavior during the discussion influences post-discussion discrimination. I use mechanism treatments to show evidence consistent with \textit{norm-based persuasion}. First, silent ``listeners'' who observe but do not participate in discussions discriminate 13 p.p. less afterward ($p$$<$0.001) -- an effect statistically indistinguishable from active participants. The more pro-transgender statements a listener hears, the less she discriminates subsequently. These findings point to interpersonal persuasion rather than self-persuasion (\textcolor{red}{CITE}) as the primary mechanism, since listeners do not engage in the discussion and yet still show strong treatment effects.





%How does this pro-trans behavior during the discussion influence post-discussion choices? 

%First, I show evidence of inter-personal persuasion using a treatment arm in which one participant silently listens to two other people engaging in a discussion. These silent ``listeners'' discriminate 13 p.p. ($p$$<$0.001) less after the dsicsussion (an effect that is not significantly different from participants who actively participate in a discussion), and listening to pro-trans discussions is associated with greater reductions in discrimination. These findings point to interpersonal persuasion rather than self-persuasion (or any other mechanism requiring personal involvement in the discussion) as the primary mechanism.

Norm-based persuasion can explain how participants influence each other's private behavior, but not why discussions create a large mean shift \textit{away} from discrimination in this context. Standard communication models predict that communication should ``average out'' behavior -- behavior should converge within a group, with no systematic change in mean behavior (see Section \ref{sec_mechanisms_asymmetry}). The substantial reduction in discrimination therefore requires sources of \textit{asymmetry}, whereby pro-transgender and anti-transgender forces do not simply cancel out. I use correlational evidence to document three potential sources of such asymmetry.

First, privately pro-trans participants speak up more frequently during discussions.  Those who privately select more transgender workers after discussions (a proxy for pro-trans private preferences)\footnote{I did not include baseline measures of discrimination in order to minimize any priming or experimenter demand effects before eliciting the main discrimination outcome, so only post-discussion choices are used in this analysis. Baseline and endline attitudes are likely to be positively correlated under a reasonable assumption: that pro-participants are not \textit{so} persuasive that they persuade others to be even more pro-trans than themselves. Nevertheless, the lack of baseline measures is an important limitation to this analysis.} are \input{../../outputs/stats/perc_spoke_first.tex} more likely to speak first and  \input{../../outputs/stats/perc_dominant.tex} more likely to dominate for discussions involving transgender workers. This asymmetric behavior appears to be motivated by a particular desire to influence others, rather than by social image concerns -- pro-trans participants are more likely to self-report intentions to shape others' choices than other participants. Pro-trans statements thus occur \input{../../outputs/stats/ratio_pro_anti_transcripts.tex}x more frequently than anti-trans statements in the transcripts. This is also driven by the substantial minority of pro-trans participants: despite strong average discrimination, \input{../../outputs/stats/pref_prob_control.tex} of control participants still have a positive willingness to pay to select a transgender worker.

%There is a substantial minority of pro-trans participants, despite strong average discrimination:  \input{../../outputs/stats/pref_prob_control.tex} of control participants have a positive willingness to pay to select a transgender worker.

%Pro-transgender participants appear to speak up more because they want to persuade others not to discriminate—they report \textcolor{red}{significantly higher desires} to influence group members' choices compared to other participants. This results in pro-trans statements being overall \input{../../outputs/stats/ratio_pro_anti_transcripts.tex}x more common than anti-trans statements based on transcript data.



%The evidence above suggests that participants affect each other's private post-discussion behavior primarily by persuading each other to change behavior by shifting perceptions of the norm of behavior towards transgender workers. However, we still need to explain the effect involves a large mean shift away from discrimination. Standard models of Bayesian communication would indicate that post-communication behavior should converge and there should be no mean shifts in behavior, so to explain the results we need to posit some asymmetry that means that pro- and anti-trans forces do not simply cancel out. I use the extensive mechanism outcomes to show suggestive evidence of three sources of such asymmetry.

%First, behavior in the discussion appears to be driven by pro-trans participants speaking up more than anti-trans participants. Participants who privately select more transgender workers after discussions (a proxy for pro-trans private attitudes)\footnote{I did not include baseline measures of discrimination in order to minimize any priming or experimenter demand effects before eliciting the main discrimination outcome, so only post-discussion choices are used in this analysis. \textcolor{red}{DESCRIBE THE LIMITATION}} are \input{../../outputs/stats/perc_spoke_first.tex} more likely to speak first and  \input{../../outputs/stats/perc_dominant.tex} more likely to dominate when discussing a choice that includes a transgender worker. Pro-transgender participants appear to speak up more because they want to persuade others not to discriminate—they report \textcolor{red}{significantly higher desires} to influence group members' choices compared to other participants. This results in pro-trans statements being overall \input{../../outputs/stats/ratio_pro_anti_transcripts.tex}x more common than anti-trans statements based on transcript data. 


%\textbf{Pro- and anti-trans statements in discussion.} Participants also on average  communicated about transgender workers in a positive way. Based on transcripts with a sentence-level indicator of arguments in favor of or against the transgender worker, participants were \input{../../outputs/stats/ratio_pro_anti_transcripts.tex}x more likely to make pro-trans than anti-trans statements (\input{../../outputs/stats/mean_pro_transcripts.tex} sentences were pro-trans, vs. \input{../../outputs/stats/mean_anti_transcripts.tex} sentences were anti-trans, $p$ of difference \input{../../outputs/stats/pval_pro_transcripts.tex}).

Second, I show suggestive evidence of asymmetric inference about norms: participants more readily shift toward a perception of pro-trans norms than away from them. Discussions \textit{generate} misperceptions -- participants overestimate how pro-trans their group members are after a discussion, reversing the underestimation seen in the control group. This suggests that participants fail to account for pro-trans participants' greater propensity to speak up, thus overestimating the prevalence of pro-trans behavior. Descriptive evidence also suggests that participants exhibit \textit{silence neglect}, updating beliefs when hearing explicit pro-trans statements but neglecting the information contained in silence. Participants who hear \textit{no} pro-transgender statements during discussions show no change in perceived norms or behavior, suggesting that silence carries little informational weight, even though the control group's results show that silent participants are largely anti-trans.  These patterns are consistent with a ``spiral of silence'' \citep{noelle-neumannSpiralSilenceTheory1974, huangBreakingSpiralSilence2023}, in which selective communication leads people to perceive a view as more prevalent than it truly is. They also link to work on misperceived social norms \citep{bursztynMisperceivedSocialNorms2020}, but with a distinctive feature---communication itself generates the misperception. 

%
%leads to misperceived prevalence of views, but with one key difference—the discussion itself generates the misperception.
%
%
%
%
% Second, I find evidence of ``silence neglect'', the phenomenon whereby people update beliefs when hearing explicit statements but neglect the information contained in silence, that could indicate anti-trans attitudes. Participants who hear \textit{no} pro-transgender statements during discussions show no change in perceived norms or behavior, confirming that silence carries little informational weight, even though those that stay silent are largely \textit{anti}-trans. These pieces of evidence are consistent with the ``spiral of silence'', where selective communication leads people to perceive a view as more prevalent than it truly is \citep{noelle-neumannSpiralSilenceTheory1974, huangBreakingSpiralSilence2023}, and with theories of of misperceived norms \citep{bursztynMisperceivedSocialNorms2020}, but with a distinct feature---the discussion itself generates the misperception, rather than merely correcting an existing one. 

 



%
%While participants initially undereestimate support for transgender workers, discussions not only correct this misperception but flip it, creating an overestimation of pro-transgender sentiment among group members. This suggests that participants do not sufficiently account for pro-trans participants being more likely to speak up in discussions. 
%
%
%Consistent with this, there is evidence of ``silence neglect'', in which people update beliefs when hearing explicit statements but underwight the information contained in silence.
%
% participants appear to update their beliefs when hearing pro-transgender statements, but don't fully account for silence that indicates opposition. 
%
%
%
%
%Participants fail to appropriately discount pro-trans members' disproportionate participation, creating an unbalanced updating pattern--
%
%
%
%This asymmetry transforms an initial underestimation of transgender support into an overestimation after discussions. "Silence neglect" amplifies this imbalance: participants update beliefs when hearing pro-transgender statements but ignore the meaning of silence as potential opposition. Those hearing no pro-transgender advocacy show no changes in norms or behavior, revealing how verbal support carries substantially more weight than silent disagreement in shaping perceptions.
%
%Second, I show evidence consistent with incorrect inference, in which participants do not sufficiently adjust for the fact that pro-trans participants speak up more, and therefore develop a misperception of a pro-trans norm in their group, i.e. they asymmetrically update towards a pro-trans norm but not away from it.  Discussions generate much larger norm updates than simply observing others' choices because they create a misperception of pro-transgender norms. Participants initially underestimate support for transgender workers, believing discrimination is more common than it actually is. Discussions not only correct this misperception but flip it, creating an overestimation of pro-transgender sentiment. This suggests that participants do not sufficiently account for the fact that pro-trans participants are more likely to speak up in discussions. Consistent with this, there is evidence of ``silence neglect'': participants appear to update their beliefs when hearing pro-transgender statements but may not fully account for silence that could indicate opposition. Specifically, participants who hear no pro-transgender statements during discussions show no change in perceived norms or behavior, confirming that silence carries little informational weight. Broadly, this mechanism is thus consistent with a theory of ``spiral of silence'', where selective communication leads people to perceive a view as more prevalent than it truly is \citep{noelle-neumannSpiralSilenceTheory1974, huangBreakingSpiralSilence2023}. It also builds on theories of misperceived norms \citep{bursztynMisperceivedSocialNorms2020}, but with a distinct feature---the discussion itself generates the misperception, rather than merely correcting an existing one.

Finally, I analyze the discussion transcripts to unpack the black box of the discussion and show suggestive evidence of \textit{rhetorical asymmetry} \citep{schkadeDeliberatingDollarsSeverity2002}, in which some arguments are more persuasive than others. In particular, the evidence is consistent with pro-trans arguments being more persuasive than anti-trans arguments because they more often invoke moral language and principles.  Participants frequently invoke moral concepts when discussing transgender workers, using language about ``opportunities," ``rights," and ``equality." These moral arguments are correlated with both larger changes in perceived norms and greater reductions in discrimination, even after controlling for within-discussion choices.\footnote{Since being in a group with others with a persuasive personality does not predict greater reductions in discrimination, this indicates that it is the \textit{arguments} rather than the \textit{people} that are more persuasive.} These findings align with research on moral reframing, which shows that arguments framed in moral terms are especially persuasive when they resonate with listeners' values \citep{voelkelMorallyReframedArguments2018, feinbergMoralReframingTechnique2019, kallaPersonalizingMoralReframing2022}.
%, in which pro-trans arguments are (in this context) more persuasive than anti-trans arguments because they invoke morality and are extra persuasive. Participants frequently invoke moral concepts when discussing transgender workers, using language about ``opportunities," ``rights," and ``equality." These moral arguments predict both larger changes in perceived norms and greater reductions in discrimination, even after controlling for within-discussion choices. These findings align with research on moral reframing, which shows that arguments framed in moral terms are especially persuasive when they resonate with listeners' values \citep{voelkelMorallyReframedArguments2018, feinbergMoralReframingTechnique2019}.


%When discussing transgender workers, participants frequently use moral concepts, speaking about "opportunities," "rights," and "equality." These moral arguments predict stronger norm changes and larger reductions in discrimination, even after controlling for within-discussion choices. This pattern connects to research on moral reframing, which shows that arguments framed in moral terms persuade more effectively when they align with listeners' values. The transcript analysis reveals how moral language likely strengthens the norm-updating process by providing compelling ethical frameworks that justify anti-discriminatory behavior.

%I unpack the black box of the discussion by analyzing transcripts, showing that persuasion is partly driven by \textit{what people say}; the discussions' effects are not driven solely through observing others' choices. Participants frequently invoke moral concepts when discussing transgender workers, using language about "opportunities," "rights," and "equality." These moral arguments predict both larger changes in perceived norms and greater reductions in discrimination, even after controlling for within-discussion choices. These findings align with research on moral reframing, which shows that arguments framed in moral terms are especially persuasive when they resonate with listeners' values \citep{voelkelMorallyReframedArguments2018, feinbergMoralReframingTechnique2019}.

%I find evidence of \textit{silence neglect}, in which participants neglect the informational content of others' silence, failing to account for the fact that silent group members are  anti-trans rather than neutral. When no-one else says anything positive about a transgender worker option, participants predict that their group members are more anti-trans, and instead do not update at all. And I also find evidence of a generated misperception






 %Participants overestimate support for transgender workers after discussions because pro-transgender voices dominate the conversation, and listeners fail to account for the informational value of silence from participants with anti-transgender attitudes.





%\vspace{3em}

%---------------





%\textcolor{red}{This mechanism builds on theories of misperceived norms \citep{bursztynMisperceivedSocialNorms2020}, but with a distinct feature---the discussion itself generates the misperception, rather than merely correcting an existing one. Participants overestimate support for transgender workers after discussions because pro-transgender voices dominate the conversation, and listeners fail to account for the informational value of silence from participants with anti-transgender attitudes.
%}








%The evidence strongly supports norm-based persuasion as the primary mechanism: pro-transgender participants speak up more during discussions, explicitly advocating for transgender workers and signaling a strong anti-discriminatory norm that influences others' subsequent private choices. 


%\textcolor{red}{SOMETHING ABOUT PERSUSASION AND PERSUSAION RATE IN THE INTRO}

%\textcolor{red}{ADD EXPLICIT DESCRIPTION OF "SHORT RUN" -- explaining why the effects are so large}


%I first document that participants strongly favor transgender workers \textit{during} the discussions, and this appears to be driven by pro-trans participants speaking up more. Participants who privately select more transgender workers after discussions (a proxy for pro-trans private attitudes)\footnote{I did not include baseline measures of discrimination in order to minimize any priming or experimenter demand effects before eliciting the main discrimination outcome, so only post-discussion choices are used in this analysis.} are \input{../../outputs/stats/perc_spoke_first.tex} more likely to speak first and  \input{../../outputs/stats/perc_dominant.tex} more likely to dominate when discussing a choice that includes a transgender worker. Pro-transgender participants appear to speak up more because they want to persuade others not to discriminate—they report significantly higher desires to influence group members' choices compared to other participants.



%Second, I show that norm updates are sufficient to reduce discrimination. I use a treatment arm in which some ``observers'' are not involved in a discussion, but are told the first-round choices of their group members. This leads to a small increase (\textcolor{red}{4.5pp}) in the perceived norm within their group towards their group selecting transgender workers in private, and in turn leads to a corresponidng 1-1 reduction in observers' private second-round choices. This suggests that simply updating norms can reduce discrimination.


%Discussions generate much larger norm updates than simply observing others' choices because they create a misperception of pro-transgender norms. Participants initially underestimate support for transgender workers, believing discrimination is more common than it actually is. Discussions not only correct this misperception but flip it, creating an overestimation of pro-transgender sentiment. This suggests that participants do not sufficiently account for the fact that pro-trans participants are more likely to speak up in discussions. Consistent with this, there is evidence of ``silence neglect'': participants appear to update their beliefs when hearing pro-transgender statements but may not fully account for silence that could indicate opposition. Specifically, participants who hear no pro-transgender statements during discussions show no change in perceived norms or behavior, confirming that silence carries little informational weight. Broadly, this mechanism is thus consistent with a theory of ``spiral of silence'', where selective communication leads people to perceive a view as more prevalent than it truly is \citep{noelle-neumannSpiralSilenceTheory1974, huangBreakingSpiralSilence2023}.


%Finally, I unpack the black box of the discussion by analyzing transcripts, showing that persuasion is partly driven by \textit{what people say}; the discussions' effects are not driven solely through observing others' choices. Participants frequently invoke moral concepts when discussing transgender workers, using language about "opportunities," "rights," and "equality." These moral arguments predict both larger changes in perceived norms and greater reductions in discrimination, even after controlling for within-discussion choices. These findings align with research on moral reframing, which shows that arguments framed in moral terms are especially persuasive when they resonate with listeners' values \citep{voelkelMorallyReframedArguments2018, feinbergMoralReframingTechnique2019}.



%\textcolor{red}{DESCRIBE HOW ALL THESE PHENOMNEA ARE POTENTAILLY EXPLAINED BY PRESCRIPTIVE / DESCRIPTIVE WEDGE??? -- prep the audience for this in the intro bits above}

%The effectiveness of horizontal communication in this setting hinges on a crucial contextual feature: the disconnect between descriptive norms (widespread discrimination) and prescriptive norms (overwhelming agreement that discrimination is wrong). Control group participants strongly discriminated in their choices, yet 93\% stated that discrimination was wrong when responding to vignettes. This disconnect creates conditions where pro-transgender participants readily speak up, facing little social cost while potentially influencing others. It also makes perceived descriptive norms highly elastic, rapidly shifting to align with expressed prescriptive norms and reducing subsequent private discrimination. This framework helps identify other promising contexts for horizontal communication interventions.




%\textcolor{red}{UP TO HERE}

Why does horizontal communication reduce discrimination here, even though it can clearly promote it in other settings (e.g., \citealp{myersDiscussionEffectsRacial1970, satyanathBowlingFascismSocial2017})? While I cannot definitively identify which contextual features are essential in the absence of cross-context comparisons, the mechanism results suggest one crucial feature: the stark disconnect between the descriptive norm (widespread discriminatory behavior) and the prescriptive norm (widespread agreement that discrimination is wrong). Control group participants discriminated substantially, yet 93\% described discrimination as wrong in response to vignettes. This disconnect can plausibly explain the three sources of asymmetry that could explain the shift away from discrimination: (i) pro-trans participants speaking up more because there is minimal social harm from advocating for transgender workers, and more to gain from persuading others; (ii) participants more readily updating their perception of the descriptive norm in favor of pro-trans behavior to align more closely with the prescriptive norm; and (iii) participants being particularly responsive to morally-framed arguments in favor of transgender workers because they align with their existing perceptions of what is ``right''. These suggestive patterns, though not conclusive, point to promising contexts for horizontal communication interventions where similar norm disconnects exist. They simultaneously indicate that horizontal communication is not a silver bullet, and may be actively harmful where people subscribe to the view that discrimination is morally correct.







%While I cannot evaluate how these features vary across settings, one key contextual feature that appears to be important for tying these mechanisms together and for understanding why horizontal communication is so helpful in this context, despite clearly promoting discrimination in other contexts (\textcolor{red}{CITE}), is that there appears to be a large disconnect between the descriptive norm (how much people actually discriminate) and the prescriptive norm (the generally accepted view about whether discrimination is right). In the study control group, despite strong average discrimination in choices, 93\% said that discrimination was wrong in response to two vignettes describing discriminatory scenarios. This appears to be key for explaining why pro-trans participants speak up more: there are minimal social costs to advocating for transgender workers, and more to gain from attempting to persuade others. And it may also explain why participants' perception of the descriptive norm within their group is so elastic, shifting rapidly in a way that also reduces subsequent private discrimination. These insights help us understand in what other settings might be promising for horizontal communication.


%In line with with theories of the "spiral of silence" \citep{noelle-neumannSpiralSilenceTheory1974, huangBreakingSpiralSilence2023}, where selective communication leads people to perceive a view as more prevalent than it truly is.


% This results from "silence neglect"—participants update their beliefs when hearing pro-transgender statements but fail to account for silence that might indicate opposition. Consequently, when pro-transgender participants speak up more frequently, the group develops an exaggerated perception of pro-transgender norms. Participants who hear no pro-transgender statements during discussions show no change in perceived norms or behavior, confirming that silence carries little informational weight.

%Third, to explain why the discussion generates large changes perceived norms, instead of just reinforcing existing behaviour, I explain why the discussion appears to generate a much larger norm-update than simply observing others' private choices. Participants start off with a misperception, believing that discrimination is more common than it really is in the control group. The discussion not only corrects this misperception, but actually generates a misperception of a pro-trans norm. This can be explained by “silence neglect”, in line with theories of spirla of silence. I find evidence consistent with such silence neglect (people do not update from discussions that don’t include pro-trans statements, but update in favor of pro-trans norms). Combined with the asymmetry in pro-trans people’s willingness to speak up, this can explain the large reductions in discrimination. 

%Finally, I unpack the black box of the discussion using the discussion transcripts to show that the content of the discussion matters beyond simply the observation of others’ choices; showing that participants invoke morality by saying things like “we have to give them opportunities” and stuff, and show that these moral discussions are associated with both larger changes in perceived norms and in turn with larger reductions in discrimination.















%Silent listeners in two-person discussions reduce their discrimination by \input{../../outputs/stats/effect_listener.tex} percentage points (p<0.001) despite never speaking themselves—an effect statistically indistinguishable from active discussants. This effect persists in private choices 2-9 weeks later, suggesting durable attitude change rather than temporary compliance. The persuasion mechanism operates through exposure to pro-transgender statements: each additional pro-transgender comment heard from group members is associated with a \input{../../outputs/stats/het_by_posneg_discussion_full_coeff.tex} percentage point reduction in post-discussion discrimination. These findings point to interpersonal influence rather than self-persuasion as the primary mechanism. 

 %
% The treatment effect on these ``listeners" is large (13 p.p., $p$$<$0.001), and is not significantly different from participants who actively participate in a discussion. Since the listener was silent, they changed their behavior solely based on the choices and justifications they heard from others in the discussion.


% more likely to speak first (by \input{../../outputs/stats/coeff_spoke_first_trans.tex} percentage points) and dominate conversations (by \input{../../outputs/stats/coeff_dominant.tex} percentage points) when discussing transgender candidates.

%Each additional transgender worker chosen in the post-discussion choices (a proxy for pro-trans private attitudes)\footnote{I did not include baseline measures of discrimination in order to minimize any priming or experimenter demand effects before eliciting the main discrimination outcome, so only post-discussion choices are used in this analysis.}  is associated with a \input{../../outputs/stats/perc_spoke_first.tex} higher probability of speaking first in the discussion ($p$=\input{../../outputs/stats/spoke_first_p_val.tex}) and a \input{../../outputs/stats/perc_dominant.tex} higher probability of dominating the discussion ($p$=\input{../../outputs/stats/dominant_p_val.tex}), but \textit{only} when discussing a choice that includes a transgender worker.








%
%
%\textcolor{red}{---------}
%
%
%\textcolor{red}{MECHANISMS OLD VERSION}
%
%I then seek to understand the mechanisms behind the effects, focusing on explaining the large impacts of the horizontal communication. Why does generating communication between privately discriminatory individuals sharply reduce post-communication discrimination? I show evidence against two candidate channels: (i) \textit{correcting a misperceived norm}, and (ii) \textit{virtue signaling}. Instead, the results appear to be driven by (iii) \textit{persuasion}.  Participants persuade each other to discriminate less, at least partly because pro-trans participants are the most vocal in the discussion, and because these participants make use of persuasive moral arguments based on equality and the need to redress societal discrimination.
%
%First, I show evidence against the effects being driven by the process of \textit{correcting a misperceived norm} \citep{bursztynMisperceivedSocialNorms2020}. If participants initially overestimated how discriminatory their peers were, and the discussion corrected this misperception, then participants might have felt more comfortable choosing a transgender worker after the discussion. However, this pattern is not sufficient to explain the large treatment effects. In incentivized predictions of their fellow group members' private choices, participants in the control group do overestimate the extent of discrimination by 5 p.p. ($p$\input{../../outputs/stats/pval_misper_control.tex}, as measured by the predicted probability of selecting a transgender worker). But the reduction in predicted discrimination of 24 p.p. ($p$$<$0.001) generated by the discussion is far larger than the initial misperception. Even under generous assumptions, a precisely corrected misperception could therefore only account for up to \input{../../outputs/stats/misperc_accounting.tex} (95\% CI: [\input{../../outputs/stats/misper_accounting_lower.tex}, \input{../../outputs/stats/misper_accounting_upper.tex}]) of the discussion's treatment effect.
%
%Second, I rule out that the discussion's effects are driven by a simple \textit{virtue signaling} channel. If participants have social image concerns and do not want to \textit{appear} discriminatory in a group setting, they may act more favorably towards transgender persons when making decisions that are visible to the rest of their group  \citep{benabouIncentivesProsocialBehavior2006,bursztynSocialImageEconomic2017a}. This could explain the discussion's effects if social image concerns encourage pro-trans behavior during the discussion that in turn persuades others to be more pro-trans. I test this channel using a \textit{No discussion (public)} treatment arm in which participants do not discuss with each other, but instead make individual hiring choices that they know will later be revealed to other members of their group. %If virtue signaling drives pro-trans behavior even in the absence of communication, this exogenous increase in social image concerns should reduce discrimination. 
%Empirically, however, this exogenous increase in social image concerns has no effect on average discrimination ($p$=\input{../../outputs/stats/p_val_r1_public.tex}), suggesting that virtue signaling alone cannot explain the effect of the discussion.
%
%
% 
% Third, I show evidence in favor of a \textit{persuasion} channel using a treatment arm in which one participant silently listens to two other people engaging in a discussion. 
% %
% The treatment effect on these ``listeners" is large (13 p.p., $p$$<$0.001), and is not significantly different from participants who actively participate in a discussion. Since the listener was silent, they changed their behavior solely based on the choices and justifications they heard from others in the discussion. 
% Moreover, the effects on listeners are highly persistent in the 2-9 week follow-up ($\beta$=\input{../../outputs/stats/coeff_listener_fu.tex} p.p., $p$\input{../../outputs/stats/p_val_listener_fu.tex}), and are also strong when examining a robustness outcome that is designed to be completely private (i.e., unobservable by neighbors). Together this is evidence that listeners are being persuaded to discriminate less by other participants.\footnote{The implied \textit{persuasion rate} of \input{../../outputs/stats/persuasion_rate.tex} is high relative to values seen in the literature \citep{dellavignaPersuasionEmpiricalEvidence2010}.}
% 
%% Moreover, the effects on listeners are highly persistent and apply when they are behaving in total privacy, providing further evidence in favor of persuasion. 
% 
% 
% 
% 
% 
%% In the follow-up (when group members are very unlikely to be present) they are \input{../../outputs/stats/coeff_listener_fu.tex} p.p. more likely to select a transgender worker  ($p$\input{../../outputs/stats/p_val_listener_fu.tex}). And the discussion reduces listeners' discrimination even on an outcome that is completely private (i.e., unobservable by neighbors).
%
% 
%% , both of which increase the probability of selecting a transgender worker by approximately  (). This suggests that the effects are driven by persuasion \textit{between} participants, rather than by participants persuading themselves or wanting to be consistent with their earlier actions. 
% 
%% \textcolor{red}{Add persuasion rates? Add other suggestive evidence for persuasion - especially the LONG RUN persuasion, cite DellaVigna et al}
% 
%% {\color{red}ADD STUFF ON EXTRA PRIVATE OUTCOME TOO??}
% 
%
%Why do discriminatory participants persuade each other to be more pro-trans? I document evidence that this is partly because the people who are most pro-trans are most vocal in the discussions. Each additional transgender worker chosen in the post-discussion choices (a proxy for pro-trans private attitudes)\footnote{I did not include baseline measures of discrimination in order to minimize any priming or experimenter demand effects before eliciting the main discrimination outcome, so only post-discussion choices are used in this analysis.}  is associated with a \input{../../outputs/stats/perc_spoke_first.tex} higher probability of speaking first in the discussion ($p$=\input{../../outputs/stats/spoke_first_p_val.tex}) and a \input{../../outputs/stats/perc_dominant.tex} higher probability of dominating the discussion ($p$=\input{../../outputs/stats/dominant_p_val.tex}), but \textit{only} when discussing a choice that includes a transgender worker. In line with this, the overall pattern of  communication during the discussions is highly pro-trans (for example, statements about transgender workers were \input{../../outputs/stats/ratio_pos_neg_mentions.tex}x more likely to say something positive than to say something negative). I develop a model to understand the conditions under which pro-trans participants would speak out more and reduce overall discrimination.\footnote{The model combines the idea from social signaling theory that people want to fit in with their group \citep{benabouIncentivesProsocialBehavior2006}, but also allows participants to persuade each other (i.e., directly affect each other's preferences by sending persuasive messages). Under the right conditions, only pro-trans participants send persuasive pro-trans messages. This occurs in a ``sweet spot'' range when discrimination is on average not too strong (otherwise no-one is even privately pro-trans) and not too weak (otherwise pro-trans participants do not have an incentive to persuade others; they are already in the majority).} I also point to the role of \textit{moral} or \textit{pro-social} arguments in explaining why participants are persuaded to discriminate less. In discussions about transgender workers, participants are particularly likely to use language about \textit{giving opportunities}, \textit{equality}, and \textit{rights}. Listeners who hear this type of reasoning discriminate less ($p$$<$$0.001$), suggesting that such moral argumentation might be particularly persuasive.
%
% 



%I show that in a ``sweet spot'' range





 
 
% participants' choices and communication regarding transgender workers \textit{during} the discussion are highly pro-trans (for example, they were \input{../../outputs/stats/ratio_pos_neg_mentions.tex}x more likely to say something positive about transgender workers than to say something negative).

%Motivated by this evidence, I present a model that  describes the conditions under which pro-trans people would be more vocal in discussions, and how this could explain the large reductions in post-discussion discrimination. In the model, participants want to fit in with their group members: they can do so either by conforming to their group's preferences, or by changing their group members' preferences to match their own.\footnote{The data provide evidence for this conformity motive: in the \textit{No discussion (public)} condition, the intragroup correlation in participants' choices is higher ($p$$=$\input{../../outputs/stats/ri_p_icc_private_public.tex}). This suggests that participants try to match each other's preferences in a group setting. This differs from a virtue-signalling motive, which I conceptualize as wanting to signal that one is not discriminatory (regardless of the preferences of others in one's group).}  
%Because people who want to take a pro-trans action know that they will deviate further from the discriminatory preferences of their group, they have a stronger incentive to persuade others to be more pro-trans, thus resulting in more pro-trans communication. The model shows that there can be a ``sweet-spot'' range, in which average preferences are discriminatory, but not \textit{too} discriminatory, where \textit{only} pro-trans participants try to persuade others, resulting in a reduction in post-discussion discrimination.  Intuitively, average preferences must be discriminatory so that pro-trans people have a greater incentive to persuade others. They cannot be too discriminatory, otherwise no-one will take a pro-trans action at all, undermining the incentive to persuade others. Conversely, if average preferences are not discriminatory enough, anti-trans participants will speak up too, possibly making the discussion harmful overall. This provides a framework for thinking about the necessary conditions for horizontal communication to reduce discrimination. 
%Because people who are pro-trans at baseline know they are further from the original discriminatory group norm, they have a stronger incentive to persuade others to be more pro-trans, thus resulting in more pro-trans communication. The model shows that there can be a ``sweet-spot'' range, in which average attitudes are discriminatory, but not \textit{too} discriminatory, where \textit{only} pro-trans participants try to persuade others, resulting in a reduction in post-discussion discrimination. 





%Attitudes have to be discriminatory, in order that pro-trans people have a greater incentive to persuade others. But they cannot be too discriminatory, otherwise no-one will have an incentive to take a pro-trans action at all. Conversely, if attitudes are not discriminatory enough, then anti-trans participants will have an incentive to speak up too, possibly making the discussion harmful on net.




%{\color{red}WHY IS THERE A SWEET SPOT RANGE?? intuition, and when does communication help / harm}
% 
% discussion is pro-trans when:
% 
% - discriminatory attitudes, but not too discriminatory
% 
% - (pro-trans vocal minority)
% 
% - presence of persuasive narratives...
% 
% - exogenous decrease in c?
% 
% - when group attitudes are better, anti-trans have more incentive to speak up
% 
% -virtue signalkng can make it feasible even when private discrimination is more severe
 
 
 
 
%  and (ii)
 
% A third channel that could explain the effects of the discussion and the emergence of a pro-trans norm is that (i) people persuade each other with the narratives and justifications they share during the discussion, and (ii) persuasive communication is predominantly in favor of transgender workers, because the pro-trans participants are more vocal in the discussion.

%Second, I show that the discussion leads to the \textit{emergence} of a pro-trans group norm. To measure such group norms, participants make incentivized predictions of whether others will hire transgender workers in private. The probability that a participant predicts that another group member privately selected a transgender worker is  \input{../../outputs/stats/effect_norms_group.tex} p.p. higher after a group discussion compared to the control condition ($p$\input{../../outputs/stats/p_val_group_norms.tex}).\footnote{%
%The discussion leads to small shifts in predictions about the wider community, while the information about legal rights also shifts both types of norms in favor of selecting transgender workers.
%} Participants do not simply become less discriminatory, they also correctly anticipate that others in their group have become less discriminatory. This shift in group norms appears to be a key mediator of the large reduction in discrimination driven by the discussion.


%that both treatments lead to significant changes in participants' perceptions about norms of behavior towards transgender people. To measure this, I ask participants to make incentivized predictions of whether others will hire transgender workers. For predictions about the norm \textit{within} the participant's group, the group discussion leads to large shifts in favor of transgender workers. {\color{red}The probability that a participant predicts that a fellow group member \textit{privately} selected a transgender worker after the discussion is  than for participants who did not take part in a discussion ($p$\input{../../outputs/stats/p_val_group_norms.tex}).} The emergence of a within-group norm in favor of transgender workers thus appears to be a key mediator of the large reduction in discrimination driven by the discussion.

%-------------------------------

%In line with the emergence of a pro-trans norm in the discussion, people behave positively towards transgender workers \textit{during} the discussion. For example, participants are \input{../../outputs/stats/effect_discussion_r1.tex} percentage points ($p$$<$0.001) more likely to select a transgender worker in the collective choices during the discussion relative to the private choices made by the control group. Participants also bias their communication in favor of transgender workers. They are \input{../../outputs/stats/ratio_pos_neg_mentions.tex}x more likely to say something positive about a transgender worker than to say something negative about them in the discussion, and they are more likely to use pro-social reasoning when presented with an option involving a transgender worker.

%And participants are significantly more likely to make positive statements about transgender workers and use pro-social reasoning for choosing them compared to non-transgender workers. 



%To further explore whether the pro-trans norm emerges because participants \textit{persuade} each other to discriminate less through their pro-transgender choices and statements, I add a treatment arm in which one participant silently listens to two other people who have a discussion. The treatment effect on these ``listeners" is just as large as on participants who actively participate in the 2-person discussion, increasing the probability of selecting a transgender worker by \input{../../outputs/stats/effect_listener.tex} p.p. ($p$$<$0.001). This suggests that the effects are driven by persuasion \textit{between} participants, rather than by participants persuading themselves or wanting to be consistent with their earlier actions.%\footnote{Results in behavioral economics have shown that self-persuasion and self-consistency motives can drive behavior in other contexts \citep{falkConsistencySignalSkills2017, schwardmannSelfPersuasionEvidenceField2022}.} 

%To explain why participants are persuaded to be more pro-trans (rather than, e.g., more anti-trans), I document suggestive evidence that the people who are most pro-trans are most vocal in the discussions. In particular, I show that each additional transgender worker chosen in the post-discussion choices (a proxy for pro-trans private attitudes)\footnote{I did not include baseline measures of discrimination in order to minimize any priming or experimenter demand effects before eliciting the main discrimination outcome, so only post-discussion choices are used in this analysis.}  is associated with a \input{../../outputs/stats/perc_spoke_first.tex} higher probability of speaking first in the discussion and a \input{../../outputs/stats/perc_dominant.tex} higher probability of dominating the discussion, but \textit{only} when discussing a choice that includes a transgender worker.

%Motivated by this evidence, I develop a model that  describes the conditions under which pro-trans people would be more vocal in discussions, and how this can explain the large reductions in post-discussion discrimination. In the model, participants want to fit in with their group members:\footnote{The data provide evidence for this conformity motive: in the \textit{No discussion (public)} condition, the intragroup correlation in participants' choices is higher ($p$$=$\input{../../outputs/stats/ri_p_icc_private_public.tex}). This suggests that participants try to match each other's preferences in a group setting.} they can do so either by conforming to the existing group norm, or by changing the group norm to match their own attitudes.  Because people who are pro-trans at baseline know they are further from the original discriminatory group norm, they have a stronger incentive to persuade others to be more pro-trans, thus resulting in more pro-trans communication. The model shows that there can be a ``sweet-spot'' range, in which average attitudes are discriminatory, but not \textit{too} discriminatory, where \textit{only} pro-trans participants try to persuade others, resulting in a reduction in post-discussion discrimination.


%{\color{red}ATTITUDES AND BELIEFS IS MISSING NOW???}



%\textcolor{red}{REMOVE}:
%Finally, I provide evidence against a number of alternative explanations of the results, including (i) other characteristics of the photos of workers, such as perceived caste; (ii) social image concerns that affect even private, post-discussion choices; (iii) increased contemplation about choices; (iv) experimenter demand effects or social desirability bias; (v) increased salience of the notion of transgender; and (vi) cheap talk due to low stakes.

%Finally, I provide evidence relating to a number of alternative explanations of the results. First, while participants in a discussion may be thinking more \textit{carefully} about their choices (as indicated by longer post-discussion response times), it is not clear that this drives lower discrimination, since longer response times are not correlated with less post-discussion discrimination ($p$=\input{../../outputs/stats/p_val_corr_duration_choice.tex}).
% Second, to address the concern that even participants' \textit{private} post-discussion choices are affected by social image concerns (e.g., because neighbors can observe grocery deliveries at home), I show that the discussion also increases the likelihood of selecting a transgender worker by \input{../../outputs/stats/anon_effect.tex} percentage points ($p$\input{../../outputs/stats/anon_effect_p.tex}) for a set of hiring choices which is unobservable by neighbors. Third, I address concerns of experimenter demand effects, social desirability bias, and salience by showing that the treatment effects are not driven by (i) participants who correctly guess the purpose of the experiment, (ii) participants with a high social desirability score at baseline, or (iii) participants for whom the word ``transgender'' is salient.









		
	
	
%	- behavior during discussion
	
%	- Effect of listening to a discussion
	
%	- Social image concerns
	
	
%	- Other mechs
	
	
	
	
	
	
	
	\begin{comment}
		
	
	\subsection{Notes}
	
	
	
	
	\textbf{Mechanisms}:
	
	To understand the mechanisms, ... Both the group discussion and the videos about the legal rights of transgender people lead to significant updates on perceived norms of behavior, but more negible effect on private attitudes. I ask participants to make incentivized predictions of how many others (in their group and community) select transgender workers, and show that this prediction increases significantly in both types of treatment groups. (\textbf{Suggests that norms of behavior are likely important mediator; but endogenous communication more drastically alters perceived norms...}
	
	
	
	To further understand the drivers behind teh effect of the discussion, ....
	
	First document characteristics of behavior during the discussion. Show that people are super pro-trans
	
	Then, I use two additional mechanism treatments that were included in the randomization to explore mechanisms.
	
	I show that just silently listening to a discussion also leads to large reduction in discrimination. [what does this imply]
	
	Social image concerns - cannot be this alone that drives effect of the discussion

	

	
%	The group discussion involved three neighbors, who made 4 collective hiring choices for deliveries and discussed the options in detail. Enumerators never mentioned transgender people themselves in this discussion; all discussion about transgender people came from the participants themselves. 	
	
	
	
	\textbf{-----------}
	
	
	
	
	
	
%	\citep{myersDiscussionEffectsRacial1970, rossPersonSituationPerspectives2011, kallaReducingExclusionaryAttitudes2020, bursztynExtremeMainstreamErosion2020}. 
	
	
Typically, discrimination has been analyzed as an individual's decision and has been attributed to deep-seated preferences against interacting with a minority \citep{beckerEconomicsDiscrimination1957} or beliefs about minorities' performance that only change when new information is revealed \citep{beckerEconomicsDiscrimination1957,phelpsStatisticalTheoryRacism1972,arrowHigherEducationFilter1973,aignerStatisticalTheoriesDiscrimination1977}. But discriminatory behavior may be more influenced by people's social context than these theories suggest \citep{myersDiscussionEffectsRacial1970, rossPersonSituationPerspectives2011, kallaReducingExclusionaryAttitudes2020, bursztynExtremeMainstreamErosion2020}. In particular, people's behavior towards minorities may be different when they act in \textit{groups} instead of individually. 

In this paper, I examine whether involving majority-group members in a group discussion can \textbf{persuade each other} to reduce discrimination against a minority in a real-stakes hiring decision. Groups may discriminate less because even when discrimination is common, it can be socially unacceptable to discriminate, implying that people in groups may bias their communication in favor of a minority in order to not be perceived as a discriminator. In-so-doing, they may persuade each other discriminate less than they would have done individually.\footnote{The idea I explore in this study thus mirrors the logic of political correctness in other settings \citep{morrisPoliticalCorrectness2001, braghieriPoliticalCorrectnessSocial2021, golmanAcceptableDiscourseSocial2022}. If it is socially unacceptable to be openly racist or sexist, then people may generate more favorable narratives about ethnic minorities and women, thus leading to an equilibrium improvement in private attitudes towards these minorities. This links to a large literature suggesting that social image concerns -- concerns about how one is perceived by others -- are a key driver of behavior in a wide variety of economic domains \citep{benabouIncentivesProsocialBehavior2006, dellavignaVotingTellOthers2017, bursztynSocialImageEconomic2017, bursztynHowDoesPeer2015, bursztynExtremeMainstreamErosion2020, bursztynJustifyingDissent2023, karingSocialSignalingChildhood2021, bursztynStatusGoodsExperimental2018, bursztynActingWifeMarriage2017}.}




	
	

	
	
	
	\textbf{-----------------------}
	
		

	Such discrimination is often attributed to \textit{prejudice}, that is, a deep-seated preference against interacting with a minority \citep{beckerEconomicsDiscrimination1957}. Other theories focus on \textit{beliefs} about minorities' ability or reliability that change when only new information is revealed \citep{phelpsStatisticalTheoryRacism1972,arrowHigherEducationFilter1973,aignerStatisticalTheoriesDiscrimination1977}. But discriminatory behavior may be more influenced by people's social context than these theories suggest \citep{myersDiscussionEffectsRacial1970, rossPersonSituationPerspectives2011, kallaReducingExclusionaryAttitudes2020, bursztynExtremeMainstreamErosion2020}. 
	
	
	
	In particular, people's behavior towards minorities may be different when they act in \textit{groups} instead of individually. For example, even when discrimination is common, it can be socially unacceptable to discriminate, implying that people in groups may bias their communication in favor of a minority in order to not be perceived as a discriminator. In-so-doing, they may persuade each other discriminate less than they would have done individually.\footnote{The idea I explore in this study thus mirrors the logic of political correctness in other settings \citep{morrisPoliticalCorrectness2001, braghieriPoliticalCorrectnessSocial2021, golmanAcceptableDiscourseSocial2022}. If it is socially unacceptable to be openly racist or sexist, then people may generate more favorable narratives about ethnic minorities and women, thus leading to an equilibrium improvement in private attitudes towards these minorities. This links to a large literature suggesting that social image concerns -- concerns about how one is perceived by others -- are a key driver of behavior in a wide variety of economic domains \citep{benabouIncentivesProsocialBehavior2006, dellavignaVotingTellOthers2017, bursztynSocialImageEconomic2017, bursztynHowDoesPeer2015, bursztynExtremeMainstreamErosion2020, bursztynJustifyingDissent2023, karingSocialSignalingChildhood2021, bursztynStatusGoodsExperimental2018, bursztynActingWifeMarriage2017}.} 
	
	
%	The idea I explore in this study thus mirrors the logic of political correctness in other settings \citep{morrisPoliticalCorrectness2001, braghieriPoliticalCorrectnessSocial2021, golmanAcceptableDiscourseSocial2022}. If it is socially unacceptable to be openly racist or sexist, then people may generate more favorable narratives about ethnic minorities and women, thus leading to an equilibrium improvement in private attitudes towards these minorities.\footnote{}
	
	In this paper, I  investigate whether involving majority-group members in a group discussion can reduce discrimination against a minority in a real-stakes hiring decision. 
	
	
	
	
	I focus on discrimination towards 
	
	
	LGBTQ+
	
	Even though such  discrimination may have significant costs \citep{badgett2014economic, badgettRelationshipLGBTInclusion2019}, very little research in LMICs has examined its effects and causes (\citealp{badgettLGBTQEconomics2021}).
	
	

	
	I test whether this group discussion can reduce discrimination \textit{after} the discussion takes place, when participants make individual, private hiring choices.
	
	
	
	
	
	
	
	
	a field experiment ($N=\input{../../outputs/stats/total_n.tex}$)
	
	When group members communicate with each other about minorities, they may change each other's private attitudes by sharing pro- or anti-minority narratives; they may share information that changes beliefs; or they may reinforce discriminatory or pro-social norms.
	
	
	There appears, therefore, to be a wedge between the descriptive norm (how much do people actually discriminate?), and the prescriptive norm (to what extent do people think it is right or wrong to discriminate?).\footnote{\citet{bicchieriNormsWildHow2017} gives a more precise definition of descriptive and prescriptive norms, along with other examples of when they can diverge. Crucially the definitions incorporate \textit{second-order beliefs}, i.e. beliefs about what other people's beliefs and attitudes are.}

	
%	\textit{SHOULD VS DO discriminate...}
	
	
	
	
%	I study this question in the context of discrimination against the \textit{hijra} community, a community largely composed of transgender women who are {\color{red}widespread} across South Asia. This community is highly visible and visually recognizable, making them subject to extensive economic discrimination and violence ({\color{red}REFS}). But while discrimination is prevalent, it also appears to be socially unacceptable to explicitly discriminate. Despite the substantial discrimination in the control group, \input{../../outputs/stats/prop_attitude_control.tex} of that same control group {\color{red}say that discrimination is unacceptable when they are given a vignette about discrimination against transgender people.} {\color{red}There appears to be a wedge between the descriptive norm of behavior, and the prescriptive norm.}
	
	
	This raises the possibility that when in a group context, people might bias their communication in favor of transgender people in order to not be perceived as a discriminator. In-so-doing, they may persuade each other to discriminate less than they would have done individually. The idea I explore in this study thus mirrors the logic of political correctness in other settings \citep{morrisPoliticalCorrectness2001, braghieriPoliticalCorrectnessSocial2021, golmanAcceptableDiscourseSocial2022}. If it is socially unacceptable to be openly racist or sexist, then people may generate more favorable narratives about ethnic minorities and women, thus leading to an equilibrium improvement in private attitudes towards these minorities.\footnote{This links to a large literature suggesting that social image concerns -- concerns about how one is perceived by others -- are a key driver of behavior in a wide variety of economic domains \citep{benabouIncentivesProsocialBehavior2006, dellavignaVotingTellOthers2017, bursztynSocialImageEconomic2017, bursztynHowDoesPeer2015, bursztynExtremeMainstreamErosion2020, bursztynJustifyingDissent2023, karingSocialSignalingChildhood2021, bursztynStatusGoodsExperimental2018, bursztynActingWifeMarriage2017}.}

	
	
	
	
	
	\textbf{----------------------}


In this paper, I investigate whether involving majority-group members in a group discussion can reduce discrimination against a minority in a real-stakes hiring decision. I test whether this group discussion can reduce discrimination \textit{after} the discussion takes place, when participants make individual, private hiring choices.




%whether it is possible to reduce discrimination in a real-stakes hiring decision against a stigmatized minority \textbf{by bringing majority-group members together in a small group and make a group hiring decision involving a minorit}y. I test whether this group discussion has the power to reduce discrimination \textit{after} the discussion takes place, when participants are asked to make individual, private hiring choices.	
In a field experiment ($N=\input{../../outputs/stats/total_n.tex}$) in Chennai, India, I measure discrimination against transgender delivery workers by offering participants a free grocery delivery, and then asking them to select both the items they will receive and the worker who will carry out the delivery. Participants make a series of 10 binary choices between two worker-and-item bundles, knowing that one of these choices will be randomly selected to be implemented. In the control arm, when all their choices were made individually and in private, there is significant discrimination: participants are \input{../../outputs/stats/baseline_discrim_round.tex} percentage points less likely to hire transgender workers than non-transgender workers. People's choices imply that they are willing to sacrifice grocery items worth \input{../../outputs/stats/wtp_control_hh_exp.tex}x the median daily food expenditure to avoid interacting with a transgender worker for 15 minutes.  By contrast, people who were earlier involved in a group discussion discriminate substantially less. The group discussion involved three neighbors, who made 4 collective hiring choices for deliveries and discussed the options in detail. Enumerators never mentioned transgender people themselves in this discussion; all discussion about transgender people came from the participants themselves. The effects of this discussion on discrimination are stark: in people's private, post-discussion hiring choices, anti-transgender discrimination is reduced to 0.


%When examing private post-discussion choices, discrimination is reduced by so much in this treatment arm that the estimate of anti-transgender discrimination is not significantly different from 0. 


% when making individual, private choices later on. Indeed discrimination is reduced by so much that the  


%But if participants are first involved in a 3-person group discussion and a group hiring decision with two of their neighbors, discrimination is sharply reduced in their individual, private choices later on. Indeed, discrimination is reduced by so much that the estimate of anti-transgender discrimination in this 3-person discussion arm is not significantly different from 0.

%Participants are involved in a 3-person group discussion with two of their neighbors, in which they are asked to make collective hiring choices and discuss the options at hand in detail. Enumerator who leads the discussion doesn't say anything about transgender people themselves; endogenous communication. 





 

%By contrast, participants who are first involved 




%I document significant economic discrimination against this transgender people in a control arm: people are \input{../../outputs/stats/baseline_discrim_round.tex} percentage points less likely to select a transgender worker than a non-transgender worker in a series of binary hiring choices. This corresponds to being willing to sacrifice \input{../../outputs/stats/wtp_control_hh_exp.tex}x the median daily food expenditure to avoid interacting with a transgender worker for 15 minutes. But if participants are first involved in a 3-person group discussion and a group hiring decision with two of their neighbors, discrimination is sharply reduced in their individual, private choices later on. Indeed, discrimination is reduced by so much that the estimate of anti-transgender discrimination in this 3-person discussion arm is not significantly different from 0.
	
	
	
	
%	{\color{red}Talk about the law?}
	
	The study focuses on discrimination against the community of \textit{hijra} or \textit{thirunangai} in South Asia, a community mostly composed of transgender women. This context is well suited to analyzing how discrimination might be affected by group dynamics. The community is visible and visually recognizable, making them vulnerable to extensive economic discrimination and violence 
	\citep{chakrapaniBarriersFreeAntiretroviral2011, sharmaChangingLandscapeSexual2014,  shivakumar2014markers, agoramoorthyLivingSocietalEdge2015,  malLetUsLive2015, ganjuStigmaViolenceHIV2017, malMolestationBengaliHijras2018, halliSuicidalityGenderMinorities2021, u.s.governmentstatedept.2021CountryReports2021}. But even though discrimination is prevalent, there appear to be norms that discourage such discrimination. Despite substantial hiring discrimination in the control group, 	\input{../../outputs/stats/prop_attitude_control_round.tex} of that same control group say that discrimination is unacceptable in response to a vignette that showcases explicit discrimination. There appears, therefore, to be a wedge between the descriptive norm (how much do people actually discriminate?), and the prescriptive norm (to what extent do people think it is right or wrong to discriminate?).\footnote{\citet{bicchieriNormsWildHow2017} gives a more precise definition of descriptive and prescriptive norms, along with other examples of when they can diverge. Crucially the definitions incorporate \textit{second-order beliefs}, i.e. beliefs about what other people's beliefs and attitudes are.}

	
%	\textit{SHOULD VS DO discriminate...}
	
	
	
	
%	I study this question in the context of discrimination against the \textit{hijra} community, a community largely composed of transgender women who are {\color{red}widespread} across South Asia. This community is highly visible and visually recognizable, making them subject to extensive economic discrimination and violence ({\color{red}REFS}). But while discrimination is prevalent, it also appears to be socially unacceptable to explicitly discriminate. Despite the substantial discrimination in the control group, \input{../../outputs/stats/prop_attitude_control.tex} of that same control group {\color{red}say that discrimination is unacceptable when they are given a vignette about discrimination against transgender people.} {\color{red}There appears to be a wedge between the descriptive norm of behavior, and the prescriptive norm.}
	
	
	This raises the possibility that when in a group context, people might bias their communication in favor of transgender people in order to not be perceived as a discriminator. In-so-doing, they may persuade each other to discriminate less than they would have done individually. The idea I explore in this study thus mirrors the logic of political correctness in other settings \citep{morrisPoliticalCorrectness2001, braghieriPoliticalCorrectnessSocial2021, golmanAcceptableDiscourseSocial2022}. If it is socially unacceptable to be openly racist or sexist, then people may generate more favorable narratives about ethnic minorities and women, thus leading to an equilibrium improvement in private attitudes towards these minorities.\footnote{This links to a large literature suggesting that social image concerns -- concerns about how one is perceived by others -- are a key driver of behavior in a wide variety of economic domains \citep{benabouIncentivesProsocialBehavior2006, dellavignaVotingTellOthers2017, bursztynSocialImageEconomic2017, bursztynHowDoesPeer2015, bursztynExtremeMainstreamErosion2020, bursztynJustifyingDissent2023, karingSocialSignalingChildhood2021, bursztynStatusGoodsExperimental2018, bursztynActingWifeMarriage2017}.}
	
	
	In line with this logic, I document that participants communicate very positively about transgender people during group discussions, despite the level of private discrimination seen in the control group. During a discussion, participants are \input{../../outputs/stats/ratio_pos_neg_mentions.tex}x more likely to make a positive comment about a transgender worker than  a negative comment. Participants are \input{../../outputs/stats/effect_discussion_r1.tex} percentage points more likely to select transgender workers when they make collective hiring choices compared to if they only make such choices individually. Moreover, when presented with options that involve a transgender worker (and particularly when those transgender workers are selected), participants are more likely to cite \textit{pro-social} reasons for their choices. They frequently say, for example, that they want to help the worker, or want to give the worker an opportunity.
	
%Why does discrimination reduce so much when one collects a group of people who are on average privately discriminatory and encourage them to communicate about a minority? \footnote{Importantly, the group discussion is designed so that participants are not explicitly led to discuss transgender people, no additional information about transgender people is revealed to them compared to the participants who do not participate in a discussion, and they receive no additional persuasive messaging. ({\color{red}Need to be careful about wording here given the law??})}

I develop a model to explore why the communication about transgender people is so positive, and why it persuades individuals to discriminate less afterwards, even when participants are privately discriminatory and they are not given any additional messaging or information about transgender people. 
%
%I develop a model to explore why discrimination is reduced so sharply when bringing together discriminatory participants and giving them an opportunity to discuss transgender people, 
%
In this model, participants' behavior during the discussion is affected by social image concerns, and endorsements of transgender and non-transgender workers during the discussion can be persuasive (i.e., change others' attitudes towards transgender people). Discrimination can be reduced when discriminatory individuals come together in a group for three reasons. First, if there is an unconditional prescriptive norm against discriminating, then even when participants believe that others discriminate, they may still face social pressure to endorse transgender people in the discussion and thereby persuade others to discriminate less. Second, if participants overestimate the prevalence of pro-trans attitudes in society (and this misperception remains uncorrected during the discussion), then they will also face social pressure to generate persuasive pro-trans endorsements in the discussion. And third, discrimination might be reduced if there is \textit{asymmetric persuasion}. Pro-trans endorsements might be more persuasive than anti-trans endorsements, for example, because pro-trans endorsements are framed in terms of moral reasoning ("I want to give them an opportunity"), whereas anti-trans endorsements are in terms of more practical matters (e.g., "I prefer the grocery items for this option").\footnote{Evidence from two lab studies suggest that pro-social narratives, i.e., those in favor of pro-social actions, can be more persuasive and change others' behavior more than anti-social narratives \citep{hillenbrandAsymmetricEffectNarratives2022, balafoutasMoralSuasionCharitable2022}. } If this is the case, then the asymmetry in persuasive power might be enough to outweigh the prevalence of discriminatory attitudes and lead to positive changes in attitudes. 
%
Using this model to guide the analysis, I explore evidence for different mechanisms underlying the results. I find suggestive evidence in favor of people being asymmetrically persuaded by pro-trans endorsements, and that there may be a weak unconditional prescriptive norm against discriminating that may drive people to discriminate less once their misperception about the level of discrimination in their group is corrected.\footnote{In a one-shot model, none of these factors is individually sufficient to generate the large treatment effects of the discussion. However, each of them is amplified in a dynamic, multi-period model which allows participants to react to each other and repeatedly send persuasive messages to each other. Thus, the mechanisms I identify may be sufficient to explain the large effects of the discussion, although other unmeasured mechanisms may also be present.}

%that describes conditions under which discrimination can be reduced when privately discriminatory individuals are brought together in a group.













%asking them to discuss transgender people. 



%\textbf{FRAMEWORK}??
	
%	\textbf{WITH THIS AS MOTIVATION}, I explore why discrimination reduces so much 
	
	
	
	
	
	

	
 

%{\color{red}\textbf{TALK ABOUT FRAMEWORK HERE???}}






%Despite the discrimination in the control group, participants who take part in a group discussion communicate very positively about transgender people. They are \input{../../outputs/stats/effect_discussion_r1.tex} percentage points more likely \textit{during the discussion} to select a transgender worker than participants who take part in an initial hiring round individually, and \input{../../outputs/stats/ratio_pos_neg_mentions.tex}x more likely to say something positive about a transgender worker than to say something negative about them. When seeing options that involve a transgender worker, and when selecting them, participants are significantly more likely to cite \textit{pro-social} reasons for selecting them, such as wanting to give the worker an opportunity or to help them, and 


%{\color{red}they are less likely to cite reasons concerning the grocery items in the options or the other characteristics of the workers.} {\color{red}This may generate a form of \textit{asymmetric persuasion}: the moral arguments for selecting a worker simply outweigh arguments based on grocery items or worker characteristics.} ({\color{red}Unfounded claim)}

%To guide my exploration of the mechanisms behind the discussion's effect, I describe a model that describes participants' behavior during and after the discussion. In this model, participants' behavior during the discussion is affected by social image concerns, and endorsements of transgender and non-transgender workers during the discussion can be persuasive (i.e., change others' attitudes towards transgender people). In this model, discrimination can be reduced when discriminatory individuals come together in a group for three reasons. First, if there is an unconditional prescriptive norm against discriminating, then even when participants believe that others discriminate, they may still face social pressure to endorse transgender people in the discussion and thereby persuade others to discriminate less. Second, if participants overestimate the prevalence of pro-trans attitudes in society (and this misperception remains uncorrected during the discussion), then they will also face social pressure to generate persuasive pro-trans endorsements in the discussion. And third, discrimination might be reduced if there is \textit{asymmetric persuasion} {\color{red}\textbf{REFS here}}. Pro-trans endorsements might be more persuasive than anti-trans endorsements, for example, because pro-trans endorsements are framed in terms of moral reasoning ("I want to give them an opportunity"), whereas anti-trans endorsements are in terms of more practical matters (e.g., "I prefer the grocery items for this option"). If this is the case, then the asymmetry in persuasive power might be enough to outweigh the prevalence of discriminatory attitudes and lead to positive changes in attitudes.


% there is \textit{}



%(i) there may be an unconditional prescriptive norm in favor of not discriminating, so that people in group settings discriminate less...; (ii) there may be persistent misperceptions about the norm.... or (iii) there may be asymmetric persuasion, in which pro-minority endorsements lead to larger changes in attitudes than anti-minority endorsements, for example because pro-minority endorsements. 

%\textit{FIRST - rule out persistent misperception as explanation????}

%Using this model to guide my exploration of the mechanisms, \textbf{and to validate it} I add two additional treatment arms that allow me to provide \textbf{suggestive evidence of the mechanisms driving the main effect of the discussion}.
I can rule out persistent misperceptions as a driver of the discussion effects. 
%Fourth, the discussion leads to a large shift in group norms. 
I ask participants to predict the private choices of others in their group after the discussion. Participants in the control group actually \textit{underestimate} the prevalence of pro-trans attitudes in their groups, suggesting that they should be biased towards being especially discriminatory in group scenarios. Moreover, the discussion not only corrects this misperception, but also leads to large shift in group norms. In particular, there is a large increase in the predicted proportion of group-members who select transgender workers, approximately in line with the true increase in those selections. This shift in levels also suggests that the discussions' effects are not simply the result of correcting a misperception, in the style of e.g., \citet{bursztynMisperceivedSocialNorms2020}.

I then use supplementary mechanism treatment arms to test the other predictions of the model and possible explanations for the discussion effects. 
%
%In addition, I show that in the control group, people start off with a misperception about their group: people overestimate the amount of discrimination in their group. 
 %
% The correction of this misperception may be part of what makes the discussion reduce discrimination. However, it is not a sufficient explanation alone, because (i) the \textit{No discussion (public)} arm also corrects the misperception but doesn't reduce discrimination, and (ii) the discussion arm does not just correct the misperception but also leads to a large level reduction in the \textit{true} and \textit{predicted} amount of discrimination. {\color{red}Possibly cite dynamic model here???}
%
%Social image arm
%- Does social image term have mean effect?
%- Does make people converge
%- People overestimate ($\delta$)
%- Implies that some countervailing force ...
First, I examine whether social image concerns lead participants to discriminate less when in a group.     To evaluate this mechanism, a supplementary treatment arm (\textit{No discussion, public}) varies the visibility of participants' choices in the first set of hiring choices. People make individual hiring choices, but they are told that their choices will be announced to the rest of their group later on. If participants have social image concerns, and don't want to appear to be discriminatory, then the public nature of their choices in this arm would lead them to discriminate less. This condition does have an impact on behavior: participants within a group-of-3 converge in terms of their likelihood of selecting a transgender worker. But it generates no detectable average treatment effects in the public choices.

The lack of average treatment effect is actually evidence of an unconditional prescriptive norm against discriminating. I show that people want to match the choices of others in their group due to social image concerns, \textit{and} that they on average overestimate the prevalence of discriminatory behavior in their groups. On a standard model of social image, this should imply that participants should discriminate \textit{more} in the \textit{public} condition. The fact that they do not (and even, the point estimate indicates that they do so slightly less) is evidence that there must be a prescriptive norm against discriminating that counterweighs the desire to match others in one's group. 

In the \textit{No discussion (public)} arm, I then vary whether the announcement of others' choices is made before or after a participant makes their individual choices. People who are told others' choices \textit{before} making their private selections discriminate slightly less, even though the choices they are told about are on average discriminatory. This is evidence of asymmetric persuasion, in which being told that another group-member has selected a transgender worker changes peoples' private choices more than being told that another group member avoided a transgender worker. 

To understand whether people are persuaded by what they hear in the group discussion, I add a treatment arm in which 2 participants engage in a group discussion, while a 3rd silently listens to the discussion (the "\textit{listener}"). Listening to a discussion by others leads to a reduction in discrimination that is just as large as taking part in the discussion themselves. This implies that active participation in the discussion is not essential to generate large treatment effects; the treatment effects do not result from a desire for self-consistency or self-persuasion.\footnote{Evidence from behavioral economics suggests that people may have a desire to be or appear consistent with previous actions \citep{falkConsistencySignalSkills2017}, or may persuade themselves to make their preferences align with their previous actions \citep{schwardmannSelfPersuasionEvidenceField2022}.} Instead, just hearing the choices, narratives and justifications given by others is sufficient to generate the large reductions in discrimination. Under an additional assumption, I can identify the causal effect on listeners of being exposed to a discussion in which a group selects a transgender. This estimate suggests large persuasive effects.





% in order to test the persuasive effect of being told that another group-member has selected a transgender worker. The persuasive impact of simply being told that another group-member has selected a transgender worker appears to be small. Together with the large persuasive effects on those who listen to a discussion, this suggests that hearing the narratives and justifications given by discussants, rather than just observing their choices, is a crucial driver of the discussion's effect.





% This suggests that the discussion has effects that go beyond purely making one's choices visible to others, again in line with the claim that the narratives surfaced during the discussion may be crucial for changing people's behavior {\color{red}(talk about other possibilities, e.g., dynamic model or social image for explanations / narratives)}


%Do participants truly persuade each other to discriminate less, or in the process of the discussion, do they persuade themselves? Evidence from behavioral economics suggests that people may have a desire to be or appear consistent with previous actions \citep{falkConsistencySignalSkills2017}, or may persuade themselves to make their preferences align with their previous actions \citep{schwardmannSelfPersuasionEvidenceField2022}. To evaluate whether participants' behavior after the discussion is simply driven by this form of \textit{self-consistency} motive or by \textit{self-persuasion}, or whether listening to a discussion is persuasive alone, I add one arm in which instead of taking part in a 3-person discussion, I have one person silently listen to two other people discussing their preferred hiring choices. Listening to a discussion by others leads to a reduction in discrimination that is just as large as taking part in the discussion themselves. This suggests that hearing the choices, narratives and justifications given by others is sufficient to generate the large reductions in discrimination; active participation in the discussion does not appear to be necessary.


%But why do discriminatory individuals act in such a positive way towards transgender people during the discussion itself? One possibility is that there is a social norm against discrimination, such that when one makes decisions in a setting that is visible to others, one faces social sanctions if one discriminates. To evaluate this \textit{social image} mechanism, a second supplementary arm (\textit{No discussion, public}) varies the visibility of participants' choices. People make individual hiring choices, but they are told that their choices will be announced to the rest of their group later on. If participants have social image concerns, and don't want to appear to be discriminatory, then the public nature of their choices in this arm would lead them to discriminate less. While this condition does have an impact on behavior (participants within a group-of-3 converge in terms of their likelihood of selecting a transgender worker), this generates \textbf{{\color{red}NO / SMALL}} average treatment effects. This suggests that the discussion has effects that go beyond purely making one's choices visible to others, again in line with the claim that the narratives surfaced during the discussion may be crucial for changing people's behavior {\color{red}(talk about other possibilities, e.g., dynamic model or social image for explanations / narratives)}













%\textbf{AFTER}

%To understand what might drive this pro-trans communication in the discussion, and how this communication changes people's subsequent private behavior, I use two additional treatment arms. Together, these treatment variations suggest a strong role for the persuasive pro-social narratives that are surfaced during the group discussion.



%{\color{red}[build up to social image - why is this important?]}



I also use a series of secondary outcomes to evaluate the mechanisms behind the effects of the group discussion. First, I show that the large reductions in discrimination also persist when using an outcome variable that is designed to be more robustly private than the main outcome. Participants make choices over a delivery option that they have to pick-up instead of being delivered to their home (the concern being that the latter is visible to their neighbors). People who were involved in a group-discussion discriminate significantly less on this outcome as well, suggesting that the treatment effects on the private, individual choices are not driven by social image concerns, but are driven by changes in either personal attitudes, personal norms, or decision heuristics. Second, I show that the results do not appear to be driven by changes in beliefs about the probability that the worker will carry out the delivery. Third, there are small changes in self-reported disapproval of discrimination, but null effects on a list-experiment measure of agreement with a discriminatory statement. It thus remains unclear the extent to which deep-seated personal attitudes are changed by the experiment. Relatedly, I re-visit participants an average of \input{../../outputs/stats/follow_up_lag_mean_round.tex} days after the first survey and show that the change generated by the 10-minute group discussion does not generate a persistent change in discriminatory behavior: there is no significant difference between treatment and control in the follow-up.




%{\color{red}FLOW HERE IS WEIRD}
	
To compare the effects of the endogeneous communication that arises within groups to an exogenous shock to beliefs, attitudes, and norms, I evaluate the effect of informing individuals about the legal rights of transgender people. Building off the literature in law and economics that proposes that the law can change people's behavior by signaling a prevailing social norm \citep{sunsteinExpressiveFunctionLaw1996, benabouLawsNorms2011, mcadamsNormsLawEconomics2004}, I cross-randomize a subset of participants to be given information about the recent advances in the legal rights of transgender people. In line with the \textit{expressive law hypothesis}, participants who are told about these legal rights subsequently discriminate less against transgender people, and do so less even than participants who are given persuasive messaging expounding the view that transgender people \textit{should} have rights (but not explaining that they \textit{do}). The effects of explaining the law are only \input{../../outputs/stats/effect_law_vs_discussion.tex} as large as the effects of involving participants in a group discussion, suggesting that in this context, allowing endogenous communication to proceed is a much more effective means of reducing discrimination than trying to actively reduce discrimination "from the outside".


Given the substantial reductions in discrimination generated by the group-discussion examined here, the results could have important policy implications. I show that promoting communication between in-group members can lead to large reductions in discrimination, and that even just listening to group discussions could generate similar effects. I also show that a simple information intervention about the legal rights of a minority can lead to modest reductions in discrimination, suggesting that campaigns that raise awareness of these legal rights may be a useful tool for reducing discrimination. The fact that allowing for endogenous communication appears to be more effective at reducing discrimination suggests that in the right circumstances, promoting open-minded, fair discussion of minorities may be more effective than trying to use top-down information interventions. This links to literature on "deep-canvassing" that shows that non-confrontational and co-operative discussions can be more effective persuasive tools than top-down persuasion \citep{mcraneyHowMindsChange2022, broockmanDurablyReducingTransphobia2016, kallaReducingExclusionaryAttitudes2020}. 

While group communication was effective at reducing discrimination in this case, there are other scenarios where endogenous group communication would likely \textit{increase} discrimination (e.g., \citealp{myersDiscussionEffectsRacial1970, mullerFANNINGFLAMESHATE2020, smithPowerTalkDeveloping2011}). 
I identify the following as potential ingredients for generating a mean reduction in discrimination in groups, that then persuades others to discriminate less individually: (i) a social norm that prohibits the expression of anti-minority sentiment and of explicit discrimination, and (ii) the asymmetric persuasive power of pro-social narratives in favor of a minority. A better understanding of when and where these conditions hold will permit identifying other scenarios where discrimination against other minorities can be reduced simply by promoting communication.

%While I am able to make some progress in understanding \textit{why} the endogenous communication about transgender people is so positive in this context, it will be important to evaluate these mechanisms in 

%where there already seems to be a social norm in place that leads to pro-minority discussion, 


















	
	
	
	
	
	
	
%	In many scenarios, it is socially unacceptable to discriminate or to express prejudice. People may therefore face social pressure to suppress any prejudiced views that they hold. For example, there is evidence that those with privately xenophobic attitudes are hold back from expressing them due to social image concerns (Braghieri 2021; Bursztyn, Egorov, Fiorin 2020; Bursztyn, Haaland, Rao, Roth 2023). This suggests that it may be possible to leverage social image concerns to reduce discrimination. One way of doing this is to encourage majority-group members to communicate with each other about minority groups. This might reduce discrimination, if people react to social pressure by biasing their communication about minorities in a positive way. This pro-minority communication may even end up changing people's private attitudes. On the other hand, if privately prejudiced individuals come together, they may reveal to each other that they are prejduced, and unravel any social norm that promoted pro-social behavior. It is therefore theoretically ambiguous whether encouraging people to communicate about a stigmatized minority affect the level of discriminaiton.
	
%	In many environments, discrimination and prejudice are socially unacceptable, leading individuals to suppress anti-minority views due to social image concerns \citep{braghieriPoliticalCorrectnessSocial2021, bursztynDisguisingPrejudicePopular2020, bursztynExtremeMainstreamErosion2020}. It may therefore be possible to these concerns to mitigate discrimination by encouraging majority-group members to communicate about minority groups. If expressing prejudice is socially unacceptable, discussions may have a pro-minority bias and lead people to mutually persuade each other to discriminate less. However, there is a risk that such communication could enable privately prejudiced individuals to confirm their biases and undermine pro-social norms. Thus, the theoretical impact of encouraging discourse about stigmatized minorities on discrimination levels remains ambiguous.

%	This study tests the effect of encouraging communication about a minority on discrimination towards that minority. I focus on discrimination towards the transgender community in India. The visibility of the transgender community in India means that such anti-trans discrimination is widespread, particularly in the areas of employment, education, and housing, as well as in instances of violence and blackmail \citep{badgett2014economic, chakrapaniStructuralViolenceKothiidentified2007, ukgovernmenthomeofficeCountryPolicyInformation2021, unesconewdelhiExperiencesBullyingSchools2018, u.s.governmentstatedept.USGovernmentState2019}. 
	
%	In a field experiment (N = 1,510) in Tamil Nadu, India, I show that bringing together three neighbors and involving them in a group discussion significantly reduces discrimination against transgender delivery workers. In the control condition, where participants are not involved in a discussion, participants are 18.3 percentage points less likely to select a delivery worker if they are transgender. This corresponds to a willingness to pay to avoid a transgender worker of approximately 110 Rs, equal to 1.7x the median daily food expenditure in the sample. By contrast, if participants are first involved in a group discussion, their later private choices suggest that they no longer discriminate against transgender workers at all. This effect appears to be partly driven by strong changes in group-level norms, and that this then filters into changes in people's private choices.
	
%	The results shown in Section \ref{sec_results} are preliminary. Results on secondary outcomes are not yet available.
	
	
%	\clearpage


	\end{comment}

%\subsection{Related literature}





%\textcolor{red}{CONTRIBUTIONS - NEW}

The key contribution of the paper is to show that discrimination can be rapidly reduced by generating horizontal communication about a minority within an existing social network. While many previous studies have shown that social contact between in-groups and out-groups can affect discrimination \citep{allportNaturePrejudice1954,pettigrewIntergroupContactTheory1998,boisjolyEmpathyAntipathyImpact2006, pettigrewMetaanalyticTestIntergroup2006,paluckContactHypothesisReevaluated2019,Rao2019, loweTypesContactField2020,cornoInteractionStereotypesPerformance2022, ghoshReligiousDivisionsProduction2022, bursztynImmigrantNextDoor2024, loweHasIntergroupContact2024}, I show that even communication between in-group members about an out-group can reduce discrimination. 

The most closely related studies are \citet{broockmanDurablyReducingTransphobia2016} and \citet{kallaReducingExclusionaryAttitudes2020, kallaWhichNarrativeStrategies2023}, which show that trained door-to-door and phone canvassers in the US can reduce prejudice through short conversations, even when the canvasser is not from the minority group. My study differs in three main ways. I focus on communication \textit{within an existing social network} -- among neighbors who know each other well, rather than canvassers talking to strangers. Understanding whether existing social networks can generate norm change raises new conceptual issues: network members may exert stronger influence on each other, but also face higher costs for challenging discriminatory norms or be too homogeneous to generate changes in behavior.
% existing social I thereby explore whether individuals already in a social network can effectively change norms despite potentially larger social costs of deviating from a discriminatory norm, or whether they are too similar to others in their network to change behavior. 
 Second, I examine \textit{undirected} communication, where participants have no explicit incentive or instruction to reduce discrimination, unlike trained canvassers with clear directives to decrease bias. The endogenous choice to speak up against discrimination appears to be important in my context and reveals how norms might shift organically without external intervention in other contexts. Third, I examine communication that includes making revealed-preference choices, with revealed-preference discrimination outcomes. This reduces the possibility of cheap talk both during the intervention and when measuring the outcome.

%First, I examine communication that is \textit{undirected}, where participants have no explicit incentive or instruction to reduce prejudice, while Broockman \& Kalla examine the effects of canvassers who were explicitly paid and trained to reduce prejudice. I show that the (endogenous) choice to speak up against discrimination, and the asymmetry in that choice, appears key for driving the results. Examining this choice to advocate is important for understanding how social change might occur without external intervention. Second, I examine communication \textit{within an existing social network}, whereas Broockman \& Kalla use canvassers talking to strangers. This provides insight into whether individuals already embedded within social networks can effectively change norms despite potentially larger social costs of deviating from a discriminatory norm, or whether they are typically too similar to others in their network to change behavior.



%The key differences with this work are that I examine communication that is \textit{undirected} and \textit{within-network}. While their canvassers were paid, trained, and directed to reduce prejudice among their targets, I examine communication between neighbors who are not directed and have no explicit incentive to reduce prejudice. 









%While their canvassers were explicitly paid and trained to reduce prejudice among strangers, I instead examine communication between neighbors with no explicit incentive to reduce prejudice. I show that pro-minority participants \textit{decide} to speak up against discrimination and successfully encourage others to discriminate less. Examining this endogenous choice to advocate is important for understanding how social change might occur without external intervention, and provides insight into whether individuals already embedded within social networks can effectively change norms despite potentially large social costs of deviating from a discriminatory norm. 

Other papers evaluate the effect of ingroup contact on outgroup prejudice.  \citet{scaccoCanSocialContact2018} find that ingroup contact backfires, and other papers typically find null results (see \citealp{loweHasIntergroupContact2024}). I contribute to this scarce literature by documenting a large reduction in discrimination that contrasts with previous studies, and by focusing on semi-structured communication about a minority among the ingroup, rather than unstructured ingroup contact that is often bundled with other treatments (e.g., skill training).

%\textcolor{red}{Say it's semi-structured conversation; within-network; ``undirected''}



%I also contribute by providing insight into \textit{why} horizontal communication reduces discrimination, showing suggestive evidence of norm-based persuasion as a key mechanism. 
%%
%%By identifying norm-based persuasion as the key mechanism, I provide insight into \textit{why} horizontal communication reduces discrimination in this context.
%%
%% can reduce discrimination across contexts. 
%%
%%I also contribute to our understanding of how the  effects of horizontal communication may generalize to other contexts by documenting a role for norm-based persuasion as a key mechanism. 
%I expand the literature on persuasion by examining the endogenous choice to persuade and the role of moral arguments in an economic context \citep{dellavignaPersuasionEmpiricalEvidence2010, benabouNarrativesImperativesMoral2020}. I use large language models to open the black box and show how moral discussion content relates to reductions in discrimination. I show suggestive evidence that the discussion generates a misperceived norm that reduces discrimination, in line with work on pluralistic ignorance and the ``spiral of silence'' that explores misperceptions about the prevalence of views and how those misperceptions can be affected by communication \citep{kuranPreferenceFalsificationPolicy1987,kuranNowOutNever1991, kuranPrivateTruthsPublic1997, bursztynMisperceivedSocialNorms2020, bursztynExtremeMainstreamErosion2020, matavelliWeDontTalk2023, huangBreakingSpiralSilence2023}. 
%%
%%
%%
%%\citep{} and building on the idea of pluralistic ignorance, in which there are misperceptions about the prevalence of discriminatory attitudes that can affect behavior . 
%%
%These findings suggest horizontal communication is likely to be helpful specifically in contexts where pro-minority views are widely accepted, even if discrimination is widespread. In such contexts, pro-minority communicators will be more willing to speak up, more able to credibly signal an anti-discriminatory norm, and more able to use persuasive moral arguments.
%
%
%\textcolor{red}{It would be nice to see some additional discussion of why this remaining difference is a meaningful one and in what ways the policy (or other) implications of these findings vary from that previous work.}
%
%\textcolor{red}{E.g., relative to Bursztyn et al, discussions can CREATE a misperception? So you can get social change even in the absence of an initial misperception???}

%I also contribute by providing insight into \textit{why} horizontal communication reduces discrimination, showing suggestive evidence of norm-based persuasion as a key mechanism.
%The discussion generates a misperceived norm that reduces discrimination, in line with work on pluralistic ignorance and the ``spiral of silence'' that explores misperceptions about the prevalence of views and how those misperceptions can be affected by communication \citep{kuranPreferenceFalsificationPolicy1987,kuranNowOutNever1991, kuranPrivateTruthsPublic1997, bursztynMisperceivedSocialNorms2020, bursztynExtremeMainstreamErosion2020, matavelliWeDontTalk2023, huangBreakingSpiralSilence2023}. My work differs in two main ways from previous papers showing that providing information and correcting misperceived norms can affect behavior. First, I examine the effect of communication \textit{between} participants rather than top-down statistics about norms delivered by the experimenter. My results imply that more naturalistic forms of communication, without an external intervention or information aggregator, has the possibility of rapidly shifting behavior. I also expand the literature on persuasion by examining the endogenous choice to persuade and using large language models to explore the role of moral arguments in such communication \citep{dellavignaPersuasionEmpiricalEvidence2010, benabouNarrativesImperativesMoral2020}. Second, while other work implies that change will only occur in the presence of pluralistic ignorance (a misperception about prevailing norms), my results instead suggest that the main precondition is a wedge between what people do and what they believe is right. In such contexts, horizontal communication is likely to be especially helpful because pro-minority communicators will be more willing to speak up, more able to credibly signal an anti-discriminatory norm, and more able to use persuasive moral arguments.


The discussion generates a misperceived norm that reduces discrimination, in line with work on pluralistic ignorance (misperceptions about norms) and how they are affected by communication \citep{kuranPreferenceFalsificationPolicy1987,kuranNowOutNever1991, kuranPrivateTruthsPublic1997, bursztynMisperceivedSocialNorms2020, bursztynExtremeMainstreamErosion2020, matavelliWeDontTalk2023, huangBreakingSpiralSilence2023}. My work differs from previous papers in three ways. First, participants generate the norm shift themselves through horizontal communication, rather than in reaction to statistics about norms delivered top-down by the experimenter. My results imply that more naturalistic communication could rapidly shift behavior, even without external intervention. Second, I expand the literature on persuasion by documenting the importance of moral arguments and the endogenous choice to persuade \citep{dellavignaPersuasionEmpiricalEvidence2010, benabouNarrativesImperativesMoral2020}, highlighting the potential role of moral “activists” in driving social change. Third, while other work implies that social change occurs only in the presence of pluralistic ignorance, my results suggest a different precondition: when there is a wedge between what people do and what they believe is right. If people discriminate even when they think it is wrong, horizontal communication is likely to be especially helpful because pro-minority communicators will be more willing to speak up, more able to credibly signal an anti-discriminatory norm, and more able to use persuasive moral arguments.


% since this makes pro-minority communicators more willing to speak up, more able to credibly signal an anti-discriminatory norm, as this enables both pro-minority advocacy and receptiveness to moral arguments.



% mechanisms help explain why communication reduces discrimination in this context but may not in settings where pro-minority views are not already socially acceptable, since this would limit both the willingness of pro-minority individuals to speak up and the effectiveness of moral persuasion.

%\textcolor{red}{ADD MATAVELLI JMP citation }

Finally, I examine potential policy levers for reducing discrimination against LGBTQ+ persons in a lower- or middle-income country (LMIC). Even though such discrimination may have significant costs \citep{badgett2014economic, badgettRelationshipLGBTInclusion2019}, very little research in LMICs has examined its effects and causes \citep{badgettLGBTQEconomics2021}.\footnote{There are notable exceptions. \citet{lyonValueSimilarityNorm2023} shows that explaining to Ugandan citizens that homosexuality is legal in other countries leads to a backlash effect, worsening participants' opinions of those countries.  \citet{gulesciTelenovelasAttitudesLGBTIQ2023} show that Latin American soap operas with LGBTQ+ characters also generate backlash, and \citet{abbateDiscriminationGayTransgender2023} show evidence of housing discrimination for some LGBTQ+ persons. Research on anti-LGBTQ+ discrimination in Europe and the US has examined its magnitude and nature \citep{tilcsikPridePrejudiceEmployment2011,carpenterTransgenderStatusGender2020,floresAttitudesTransgenderRights2015a,drydakisSexualOrientationEarnings2022,klawitterMetaAnalysisEffectsSexual2015,burnRelationshipPrejudiceWage2020, granbergHiringDiscriminationTransgender2020,buttonGenderIdentityRace2020, granbergHiringDiscriminationTransgender2020, badgettReviewEconomicsSexualforthcoming}, along with strategies for reducing it \citep{sansonePinkWorkSameSex2018,aksoyLawsShapeAttitudes2020, tankardEffectSupremeCourt2017,ofosuSamesexMarriageLegalization2019, aksoyReducingSexualOrientationDiscrimination2021}.} I contribute by evaluating the potential to reduce this discrimination using horizontal communication.


%Finally, I examine potential policy levers for reducing discrimination against LGBTQ+ persons in a lower- or middle-income country (LMIC). Even though such discrimination may have significant costs \citep{badgett2014economic, badgettRelationshipLGBTInclusion2019}, very little research in LMICs has examined its effects and causes (\citealp{badgettLGBTQEconomics2021}).\footnote{There are notable exceptions. \citet{lyonValueSimilarityNorm2023} shows that explaining to Ugandan citizens that homosexuality is legal in other countries leads to a backlash effect, worsening participants' opinions of those countries.  In Latin America, \citet{gulesciTelenovelasAttitudesLGBTIQ2023} show that soap operas with LGBTQ+ characters also generate backlash, and \citet{abbateDiscriminationGayTransgender2023} show evidence of housing discrimination for some LGBTQ+ persons. Research on anti-LGBTQ+ discrimination in Europe and the US has examined its magnitude and nature \citep{tilcsikPridePrejudiceEmployment2011,carpenterTransgenderStatusGender2020,floresAttitudesTransgenderRights2015a,drydakisSexualOrientationEarnings2022,klawitterMetaAnalysisEffectsSexual2015,burnRelationshipPrejudiceWage2020, granbergHiringDiscriminationTransgender2020,buttonGenderIdentityRace2020, granbergHiringDiscriminationTransgender2020, badgettReviewEconomicsSexualforthcoming}, along with strategies for reducing it \citep{sansonePinkWorkSameSex2018,aksoyLawsShapeAttitudes2020, tankardEffectSupremeCourt2017,ofosuSamesexMarriageLegalization2019, aksoyReducingSexualOrientationDiscrimination2021}.} I contribute by examining the relative merits of reducing discrimination using horizontal and top-down communication.







%\textcolor{red}{CONTRIBUTIONS - OLD}

%This study makes four contributions. The key contribution is to show that discrimination can be rapidly reduced by generating horizontal communication about a minority, even in the absence of any additional information being injected into the group. While many previous studies have shown that social contact between in-groups and out-groups can affect discrimination \citep{allportNaturePrejudice1954,pettigrewIntergroupContactTheory1998,boisjolyEmpathyAntipathyImpact2006, pettigrewMetaanalyticTestIntergroup2006,paluckContactHypothesisReevaluated2019,Rao2019, loweTypesContactField2020,cornoInteractionStereotypesPerformance2022, ghoshReligiousDivisionsProduction2022, bursztynImmigrantNextDoor2024}, I show that even communication \textit{within} the in-group can reduce discrimination. I find large effects on discrimination, contrasting with standard economic theories that attribute discrimination to deep-seated preference parameters that are hard to change \citep{beckerEconomicsDiscrimination1957, bertrandFieldExperimentsDiscrimination2017}. %or to beliefs about minorities that only change when someone receives new information \citep{phelpsStatisticalTheoryRacism1972,arrowHigherEducationFilter1973,aignerStatisticalTheoriesDiscrimination1977}. 
%My results also contrast with work in social psychology showing that horizontal communication tends to reinforce existing views and exacerbate prejudice \citep{moscoviciGroupPolarizerAttitudes1969, myersDiscussionEffectsRacial1970, myersGroupPolarizationPhenomenon1976, smithPowerTalkDeveloping2011, heuserEffectsInPersonConversations2022}.\footnote{I also speak to a broader literature on how communication affects economic behaviors, e.g., \citealp{dellavignaPersuasionEmpiricalEvidence2010, kamenicaBayesianPersuasion2011, Ashraf2014, Ashraf2020,  shillerNarrativeEconomics2017, Baland2017, levyEchoChambersTheir2019, benabouNarrativesImperativesMoral2020, bursztynJustifyingDissent2023}.} 











%\textcolor{red}{Other work has also evaluated interventions where discussions are not peer-to-peer, but are driven by facilitators and  designed to reduce discrimination \citep{bezrukovaMetaanalyticalIntegration402016,broockmanDurablyReducingTransphobia2016,kallaReducingExclusionaryAttitudes2020}. By contrast, my study shows that the horizontal communication that endogenously arises within a group can reduce discrimination. }





%\textcolor{red}{CITE SOCIAL PSYCH STUDIES IN THIS PARA}


%\textcolor{red}{Cite horizontal communication papers somehwere - spiral of silence, }

%Given the surprising shift away from discrimination driven by communication, I contribute by providing extensive evidence on potential \textit{mechanisms} underlying endogenous social change.\footnote{Other literature also examines ways of exogenously changing social norms in order to reduce discrimination \citep{broockmanDurablyReducingTransphobia2016,kallaReducingExclusionaryAttitudes2020, dharReshapingAdolescentsGender2022a,jayachandranSocialNormsBarrier2021, gomezDepersonalizedExtendedContact2018, beamanPowerfulWomenDoes2009, LaFerrara2012, Banerjee2019, andrewGenderNormsViolence2022}.
%} Existing literature has focused on ``pluralistic ignorance'', in which there are misperceptions about the prevalence of discriminatory attitudes, suggesting that horizontal communication could change behavior by correcting these misperceptions \citep{kuranPreferenceFalsificationPolicy1987,kuranNowOutNever1991, kuranPrivateTruthsPublic1997, bursztynMisperceivedSocialNorms2020, bursztynExtremeMainstreamErosion2020}. I show that even without such large misperceptions, social change can instead occur when people mutually \textit{persuade} each other to change their attitudes. This is consistent with a model in which ``activists'' or ``positive deviants'' (those with strong pro-social preferences) choose to be more vocal and persuade others. I thereby expand on a literature on how people adapt their communication in group settings, thus promoting the spread of certain narratives \citep{braghieriPoliticalCorrectnessSocial2021,morrisPoliticalCorrectness2001, huangBreakingSpiralSilence2023, golmanAcceptableDiscourseSocial2022, crandallSocialNormsExpression2002, benabouNarrativesImperativesMoral2020, bursztynJustifyingDissent2023}.







%An 




 

%An important strand of literature has focused on cases of ``pluralistic ignorance'', in which there are misperceptions about the prevalence of discriminatory attitudes, suggesting that horizontal communication could correct these misperceptions and thereby change behavior \citep{kuranPreferenceFalsificationPolicy1987,kuranNowOutNever1991, kuranPrivateTruthsPublic1997, bursztynMisperceivedSocialNorms2020, bursztynExtremeMainstreamErosion2020}.\footnote{Other literature also examines ways of exogenously changing social norms in order to reduce discrimination \citep{dharReshapingAdolescentsGender2022a,jayachandranSocialNormsBarrier2021, gomezDepersonalizedExtendedContact2018, beamanPowerfulWomenDoes2009, LaFerrara2012, Banerjee2019, andrewGenderNormsViolence2022}.
%} In contrast, I show that even in the absence of such large misperceptions, social change can be generated when people mutually persuade each other to change their attitudes, consistent with a model in which individuals self-select into being more vocal.
%I link to prior work on how people adapt their \textit{communication} to conform to social norms, thereby promoting the spread of certain narratives \citep{braghieriPoliticalCorrectnessSocial2021,morrisPoliticalCorrectness2001, golmanAcceptableDiscourseSocial2022, crandallSocialNormsExpression2002, benabouNarrativesImperativesMoral2020, bursztynJustifyingDissent2023}. I show that such communication can be persuasive and therefore generate equilibrium changes in private behavior.


%thereby contributing to a literature on social norms and social change \citep{kuranPreferenceFalsificationPolicy1987, fernandezCulturalChangeLearning2013, sunsteinHowChangeHappens2019, gulesciSteppingStoneApproach2023, andreoniPredictingSocialTipping2021}.

%: in following an anti-discriminatory norm, people may persuade each other to discriminate less even in subsequent private choices.


%\footnote{My work thus provides a micro-foundation for theoretical models of social change in which social norms can generate longer-run changes in private attitudes (see, e.g., \citealt{kuranPreferenceFalsificationPolicy1987}). I also focus on what types of communication about moral behavior can change behavior. In line with my results, while top-down moral suasion effects are often small or transitory \citep{dalboRightThingEffects2014a, haalandDesigningInformationProvision2020}, interventions that use inter-personal conversations can generate longer-run reductions in prejudice \citep{broockmanDurablyReducingTransphobia2016,kallaReducingExclusionaryAttitudes2020}.}




%The focus on the effects of intra-group dynamics links to research examining how social norms can shape discriminatory behavior \citep{bursztynExtremeMainstreamErosion2020,bursztynMisperceivedSocialNorms2020,alesinaOriginsGenderRoles2013,crandallSocialNormsExpression2002, mungerTweetmentEffectsTweeted2017, siegelNo2SectarianismExperimentalApproaches2020, jayachandranSocialNormsBarrier2021}, and how such social norms might be changed to reduce discrimination .









%Second, this work contributes to the literature on how group settings affect decision-making. Social psychology has emphasised that groups aggregate individuals' preferences, leading to convergence \textit{within} a group, but more polarisation \textit{between} groups \citep{stonerComparisonIndividualGroup1961,myersGroupPolarizationPhenomenon1976a, schroederRiskyShiftGeneral1973, sunstein2009going, heuserEffectsInPersonConversations2022}.\footnote{\citet{myersDiscussionEffectsRacial1970}, for example, show that group discussions make high-prejudice groups more discriminatory, and low-prejudice groups less discriminatory. Several theoretical models explore whether groups simply aggregate pre-existing preferences, or whether exchanging information and opinions can transform attitudes and norms \citep{davisGroupDecisionSocial1973, kerrModelTestingModel1979, stasserGroupDecisionMaking1981,kerrSocialTransitionSchemes1981,ambrusHowIndividualPreferences2015}. A separate strand of the literature also shows that intra-group communication can facilitate coordination, allowing groups to reach desirable equilibria, for example by coordinating in public goods games \citep{bicchieriCovenantsSwordsGroup2002, cameraCommunicationCommitmentDeception2011}.} In contrast, I show that group settings can lead to large average changes in behavior, suggesting that they do not simply average out existing preferences. I suggest that social norms underly some of this aggregate effect, linking to a large literature on how social norms can shape discriminatory behavior.\footnote{See e.g., \citet{benabouIncentivesProsocialBehavior2006, bursztynExtremeMainstreamErosion2020,bursztynMisperceivedSocialNorms2020,alesinaOriginsGenderRoles2013, mungerTweetmentEffectsTweeted2017, siegelNo2SectarianismExperimentalApproaches2020, jayachandranSocialNormsBarrier2021}. A related series of papers also examines how to change these social norms to reduce discrimination \citep{dharReshapingAdolescentsGender2022,gulesciSteppingStoneApproach2021,jayachandranSocialNormsBarrier2020, gomezDepersonalizedExtendedContact2018, beamanPowerfulWomenDoes2009a, gulesciSteppingStoneApproach2021, LaFerrara2012, Banerjee2019, sechristInfluenceSocialConsensus2011, andreoniPredictingSocialTipping2021, andrewGenderNormsViolence2022}.} 


%A second strand examines whether narratives can directly change beliefs or attitudes, particularly in the context of moral behavior \citep{graeberStoriesStatisticsMemory2022, benabouNarrativesImperativesMoral2020, eliazModelCompetingNarratives2020, alesinaImmigrationRedistribution2023, bursztynOpinionsFacts2022}. In line with my results, while top-down moral suasion effects are often small or transitory \citep{dalboRightThingEffects2014a, haalandDesigningInformationProvision2020}, interventions that use inter-personal conversations can generate longer-run reductions in prejudice \citep{broockmanDurablyReducingTransphobia2016,kallaReducingExclusionaryAttitudes2020}.



%I link to a growing body of work that explores the importance of narratives --- frames for understanding the world and communicating with others --- on economic behavior \citep{shillerNarrativeEconomics2017, benabouNarrativesImperativesMoral2020, bursztynJustifyingDissent2023,andreNarrativesMacroeconomy2022}. One strand of prior work has focused on people adapt their communication to conform to social norms, thereby biasing the spread of certain narratives \citep{braghieriPoliticalCorrectnessSocial2021,morrisPoliticalCorrectness2001, golmanAcceptableDiscourseSocial2022, crandallSocialNormsExpression2002a}. I show that such bias can generate equilibrium changes in attitudes: in following an anti-discriminatory norm, people persuade each other to discriminate less. My work thus provides a micro-foundation for theoretical models of social change in which social norms can generate longer-run changes in private attitudes (see, e.g., \citealt{kuranPreferenceFalsificationPolicy1987}). 

%
%Third, I show that raising awareness of minority rights can reduce discrimination. I thus provide empirical evidence for the \textit{expressive law hypothesis}, which postulates that changes in the law affect people's behavior by changing their perception of the prevailing social norm \citep{mcadamsAttitudinalTheoryExpressive2001, mcadamsFocalPointTheory2000, mcadamsNormsLawEconomics2004, benabouLawsNorms2011, sunsteinExpressiveFunctionLaw1996}. 
%%In line with this, I show that changing people's beliefs about the legal rights of a minority can reduce discrimination, and that this is partly mediated by changes in beliefs about norms. 
%I build on a recent empirical literature that examines this hypothesis \citep{laneLawNormsEmpirical2019, funkThereExpressiveFunction2007, aksoyLawsShapeAttitudes2020, chenConstructionMorals2014, tankardEffectSupremeCourt2017, galbiatiHowLawsAffect2020, ofosuSamesexMarriageLegalization2019, wheatonLawsBeliefsBacklash2020}, and contribute by assessing whether communicating about the law can still affect behavior in a lower-middle income setting, where state capacity and trust in the legal system are lower. %I also show that horizontal communication between individuals is more effective at reducing discrimination than top-down communication about the law.
%

%In addition, I show that raising awareness of legal rights may be more effective than simply advocating for those rights without institutional backing. 

% In my study, where citizens' knowledge of the law is initially poor, I can cleanly identify causal effects by truthfully altering participants' beliefs about the law. I also contribute by assessing whether the law can still affect behavior outside of a high-income setting, where state capacity and trust in the legal system are lower. {\color{red}EDIT PARAGRAPH TO COMPARR RIGHTS VS DISCUSSION A BIT MORE???}
% 
% Finally, I examine potential policy levers for reducing discrimination against LGBTQ+ persons in a lower- or middle-income country (LMIC). Even though such discrimination may have significant costs \citep{badgett2014economic, badgettRelationshipLGBTInclusion2019}, very little research in LMICs has examined its effects and causes (\citealp{badgettLGBTQEconomics2021}).\footnote{There are notable exceptions. \citet{lyonValueSimilarityNorm2023} shows that explaining to Ugandan citizens that homosexuality is legal in other countries leads to a backlash effect, worsening participants' opinions of those countries.  In Latin America, \citet{gulesciTelenovelasAttitudesLGBTIQ2023} show that soap operas with LGBTQ+ characters also generate backlash, and \citet{abbateDiscriminationGayTransgender2023} show evidence of housing discrimination for some LGBTQ+ persons. Research on anti-LGBTQ+ discrimination in Europe and the US has examined its magnitude and nature \citep{tilcsikPridePrejudiceEmployment2011,carpenterTransgenderStatusGender2020,floresAttitudesTransgenderRights2015a,drydakisSexualOrientationEarnings2022,klawitterMetaAnalysisEffectsSexual2015,burnRelationshipPrejudiceWage2020, granbergHiringDiscriminationTransgender2020,buttonGenderIdentityRace2020, granbergHiringDiscriminationTransgender2020, badgettReviewEconomicsSexualforthcoming}, along with strategies for reducing it \citep{sansonePinkWorkSameSex2018,aksoyLawsShapeAttitudes2020, tankardEffectSupremeCourt2017,ofosuSamesexMarriageLegalization2019, aksoyReducingSexualOrientationDiscrimination2021}.} I contribute by examining the relative merits of reducing discrimination using horizontal and top-down communication.
% 
% 

 
 
 
 



%Under this polarisation model, groups simply aggregate the preferences of the constituent participants, implying that the mean behavior of groups should not differ from individuals.%
%\footnote{Several theoretical models explore whether groups simply aggregate pre-existing preferences, or whether exchanging information and opinions can transform attitudes and norms \citep{davisGroupDecisionSocial1973, kerrModelTestingModel1979, stasserGroupDecisionMaking1981,kerrSocialTransitionSchemes1981,ambrusHowIndividualPreferences2015}. A separate strand of the literature also shows that intra-group communication can facilitate coordination, allowing groups to reach desirable equilibria, for example by coordinating in public goods games \citep{bicchieriCovenantsSwordsGroup2002, cameraCommunicationCommitmentDeception2011}.} I contribute by exploring a context in which group discussions lead to a large average reduction in post-group discrimination, suggesting that the group cannot simply be aggregating pre-existing preferences.



%
%\textbf{-----------------------}
%
%I contribute to the literature on discrimination \citep{paluckReducingIntergroupPrejudice2009,bertrandFieldExperimentsDiscrimination2017,paluckPrejudiceReductionProgress2021}, which has documented the potential for large efficiency and equity costs of discrimination in wide range of economic domains, including the labor market \citep{hjortEthnicDivisionsProduction2014, gloverDiscriminationSelfFulfillingProphecy2017, ashrafGenderRolesMisallocation2022}, health care decisions \citep{alsanDoesDiversityMatter2019}, credit markets \citep{fismanCulturalProximityLoan2017}, politics \citep{delucaEthnicFavoritismAxiom2018}, and informal social interactions \citep{loweTypesContactField2020}.
%
%
%
%
% Standard theories of discrimination in economics attribute it to a deep-seated preference parameter \citep{beckerEconomicsDiscrimination1957} or to beliefs that change only when someone receives new information \citep{phelpsStatisticalTheoryRacism1972,arrowHigherEducationFilter1973,aignerStatisticalTheoriesDiscrimination1977}. These theories cannot easily account for long-term equilibrium changes in the prevalence of discrimination in society. 
% 
% My study shows that discrimination may be significantly more malleable to social context than these models would suggest, and provides a foundation for changes in societal prejudice. 
%
%In focusing on the effects of inter-personal communication on discrimination, I relate to the literature on social contact theory \citep{allportNaturePrejudice1954,pettigrewIntergroupContactTheory1998,paluckContactHypothesisReevaluated2019,loweTypesContactField2020,Rao2019}. This literature shows that social contact between in-groups and out-groups can reduce discrimination. Conversely, I show that even social contact \textit{within} the in-group can facilitate reductions in discrimination under the right circumstances.  Other work has also evaluated interventions where inter-personal communication is specifically designed to reduce discrimination \citep{broockmanDurablyReducingTransphobia2016,kallaReducingExclusionaryAttitudes2020}. By contrast, my study shows that the communication that endogenously arises within a group, without being directed in any particular way by the intervention, can encourage pro-minority choices.
%
%This work contributes to the literature on how group settings affect decision-making. Work in social psychology has emphasised that people's attitudes tend to converge within a group. At the same time, this convergence means that groups typically become more polarised and exhibit more extreme behavior than individuals \citep{stonerComparisonIndividualGroup1961,myersGroupPolarizationPhenomenon1976a, schroederRiskyShiftGeneral1973}. \citet{myersDiscussionEffectsRacial1970}, for example, show that group discussions make high-prejudice groups more discriminatory, and low-prejudice groups less discriminatory. Under this polarisation model, groups simply aggregate the preferences of the constituent participants, implying that the mean behavior of groups should not differ from individuals.%
%\footnote{Several theoretical models explore whether groups simply aggregate pre-existing preferences, or whether exchanging information and opinions can transform attitudes and norms \citep{davisGroupDecisionSocial1973, kerrModelTestingModel1979, stasserGroupDecisionMaking1981,kerrSocialTransitionSchemes1981,ambrusHowIndividualPreferences2015}. A separate strand of the literature also shows that intra-group communication can facilitate coordination, allowing groups to reach desirable equilibria, for example by coordinating in public goods games \citep{bicchieriCovenantsSwordsGroup2002, cameraCommunicationCommitmentDeception2011}.} I contribute by exploring a context in which group discussions lead to a large average reduction in post-group discrimination, suggesting that the group cannot simply be aggregating pre-existing preferences.
%% By contrast, I present a context in which taking part in a group discussion leads to a large average reduction in discrimination in post-group choices, suggesting that the group cannot simply be averaging pre-existing preferences. 
%
%
%The focus on the effects of intra-group dynamics links to research examining how social norms can shape discriminatory behavior \citep{bursztynExtremeMainstreamErosion2020,bursztynMisperceivedSocialNorms2020,alesinaOriginsGenderRoles2013,crandallSocialNormsExpression2002, mungerTweetmentEffectsTweeted2017, siegelNo2SectarianismExperimentalApproaches2020, jayachandranSocialNormsBarrier2021}, and how such social norms might be changed to reduce discrimination \citep{dharReshapingAdolescentsGender2022,gulesciSteppingStoneApproach2021,jayachandranSocialNormsBarrier2020, gomezDepersonalizedExtendedContact2018, beamanPowerfulWomenDoes2009a, gulesciSteppingStoneApproach2021, LaFerrara2012, Banerjee2019, sechristInfluenceSocialConsensus2011, andreoniPredictingSocialTipping2021, crandallSocialNormsExpression2002a, andrewGenderNormsViolence2022}. 
%
%I examine whether the law can be used to change a discriminatory social norm. A large theoretical literature on the \textit{expressive law hypothesis} has postulated that changes in the law may affect people's behavior by changing their perception of the prevailing social norm \citep{mcadamsAttitudinalTheoryExpressive2001, mcadamsFocalPointTheory2000, mcadamsNormsLawEconomics2004, benabouLawsNorms2011, sunsteinExpressiveFunctionLaw1996}. I show that changing people's beliefs about the legal rights of a minority can reduce discrimination, and may be more effective than simply advocating for rights without institutional backing. This complements a newer empirical literature on how changes in laws affect attitudes and norms \citep{laneLawNormsEmpirical2019, funkThereExpressiveFunction2007, aksoyLawsShapeAttitudes2020, chenConstructionMorals2014, tankardEffectSupremeCourt2017, galbiatiHowLawsAffect2020, ofosuSamesexMarriageLegalization2019, wheatonLawsBeliefsBacklash2020}. My context, where citizen's knowledge of the law is initially poor, provides
% clean identification by allowing us to truthfully manipulate participants' beliefs about the law. I also contribute by evaluating the whether the law is still able to affect behavior outside of a high-income setting, where state capacity and trust in the legal system is lower.
%
%
%
%
%
%I link to a growing body of work that explores the importance of narratives --- frames for understanding the world and communicating with others --- on economic behavior \citep{shillerNarrativeEconomics2017, benabouNarrativesImperativesMoral2020, bursztynJustifyingDissent2023,andreNarrativesMacroeconomy2022}. One strand of prior work has focused on people adapt their communication to conform to social norms, thereby biasing the spread of certain narratives \citep{braghieriPoliticalCorrectnessSocial2021,morrisPoliticalCorrectness2001, golmanAcceptableDiscourseSocial2022, crandallSocialNormsExpression2002a}. I show that such bias can generate equilibrium changes in attitudes: in following an anti-discriminatory norm, people persuade each other to discriminate less. My work thus provides a micro-foundation for theoretical models of social change in which social norms can generate longer-run changes in private attitudes (see, e.g., \citealt{kuranPreferenceFalsificationPolicy1987}). 
%
%A second strand examines whether narratives can directly change beliefs or attitudes, particularly in the context of moral behavior \citep{graeberStoriesStatisticsMemory2022, benabouNarrativesImperativesMoral2020, eliazModelCompetingNarratives2020, alesinaImmigrationRedistribution2023, bursztynOpinionsFacts2022}. In line with my results, while top-down moral suasion effects are often small or transitory \citep{dalboRightThingEffects2014a, haalandDesigningInformationProvision2020}, interventions that use inter-personal conversations can generate longer-run reductions in prejudice \citep{broockmanDurablyReducingTransphobia2016,kallaReducingExclusionaryAttitudes2020}.
%
%
%My focus on whether people's behavior in groups is driven their concerns about social image also links to an expanding literature that shows that such concerns have large impacts on important economic behaviors \citep{benabouIncentivesProsocialBehavior2006, dellavignaVotingTellOthers2017, bursztynSocialImageEconomic2017, bursztynHowDoesPeer2015, bursztynExtremeMainstreamErosion2020, bursztynJustifyingDissent2023, karingSocialSignalingChildhood2021, bursztynStatusGoodsExperimental2018, bursztynActingWifeMarriage2017}. I contribute by exploring how these concerns play out in free-flowing group scenarios, and how they affect real-stakes hiring decisions.
%
%Finally, I present a study that examines policy levers for reducing economic discrimination against LGBTQ+ groups in a lower- or middle-income country (LMIC). The exclusion of LGBTQ+ groups may have significant macro-level economic effects \citet{badgett2014economic, badgettRelationshipLGBTInclusion2019}.
%And even though these groups are highly vulnerable to discrimination, harassment, and violence, they have so far been the focus of almost no research (as identified by a review of the recent literature, see \citealp{badgettLGBTQEconomics2021}). There are notable exceptions in Latin America.  \citet{gulesciTelenovelasAttitudesLGBTIQ2023} shows that soap operas in Latin America with LGBTQ+ characters generate backlash, leading to more anti-LGBTQ+ attitudes. And  \citet{abbateDiscriminationGayTransgender2023} examine discrimination in the housing market against LGBTQ+ individuals, showing that couples involving a transgender woman receive markedly fewer callbacks in their correspondence methodology, while gay male couples do not appear to face similar discrimination. Other research on LGBTQ+ groups in Europe and the US has sought to understand the magnitude and nature of the discrimination they face \citep{tilcsikPridePrejudiceEmployment2011,carpenterTransgenderStatusGender2020,floresAttitudesTransgenderRights2015a,drydakisSexualOrientationEarnings2022,klawitterMetaAnalysisEffectsSexual2015,burnRelationshipPrejudiceWage2020, buttonGenderIdentityRace2020}, along with whether discrimination can be reduced by changes in the law \citep{sansonePinkWorkSameSex2018,aksoyLawsShapeAttitudes2020, tankardEffectSupremeCourt2017,ofosuSamesexMarriageLegalization2019} or information interventions \citep{aksoyReducingSexualOrientationDiscrimination2021}. Most relevant to the current study,  \citet{granbergHiringDiscriminationTransgender2020} show some evidence of hiring discrimination against transgender individuals in an audit study in Sweden. I contribute to this literature by focusing on an LMIC, and examining a novel method of reducing discrimination through group discussions. 
%
%
%







	
	
%	
%	Focusing on a highly visible and highly stigmatized 
%	
%	
%	The social norms that underpin societal prejudice are typically highly persistent \citep{alesinaOriginsGenderRoles2013, acharyaDeepRootsHow2018,  becker2022origins}. But legal processes, particularly granting rights to minority groups, may have the power to rapidly transform these norms by shifting people’s perceptions of what behavior is socially acceptable \citep{sunsteinExpressiveFunctionLaw1996, mcadamsFocalPointTheory2000, mcadamsAttitudinalTheoryExpressive2001, mcadamsNormsLawEconomics2004, benabouLawsNorms2011, laneLawNormsEmpirical2019}. In this project, I will test whether the expansion of the legal rights of a highly stigmatized minority in India, transgender people, can reduce anti-trans discrimination by altering social norms. The visibility of the transgender community in India means that such anti-trans discrimination is widespread, particularly in the areas of employment, education, and housing, as well as in instances of violence and blackmail \citep{badgett2014economic, chakrapaniStructuralViolenceKothiidentified2007, ukgovernmenthomeofficeCountryPolicyInformation2021, unesconewdelhiExperiencesBullyingSchools2018, u.s.governmentstatedept.USGovernmentState2019}. While there have been legal advances—in 2014, the Supreme Court of India ruled that transgender people possess the same fundamental rights as cisgender individuals—awareness of these legal advances remains low. 
%	
%	For my job market paper, I will therefore run a randomized control trial to assess whether educating individuals about transgender rights is an effective means of reducing discrimination in a real-world hiring choice. I will delve into the role that social dynamics play in determining anti-trans discrimination by examining whether this information encourages people to communicate more positively about trans individuals in group discussions, potentially creating a social multiplier on the effect of the information interventions. 
%	
%	In pilot results (N=120), the interventions lead to major reductions in discrimination. Delivering the main information treatment in a setting where participants are able to discuss their hiring choices increases the probability of hiring a transgender person from 32\% to 88\% (see Section \ref{sec_results}). This effect appears to be driven by changes in private attitudes (as measured by an implicit association test) and by changes in the way people communicate about transgender people in discussions.
	
%	The data collection for the full sample (N=2280) will take place over March and April 2023.

\section{Context: Transgender community in India}

This study examines discrimination against a historically marginalized community in South Asia largely composed of transgender women, who in the state of Tamil Nadu are called \textit{thirunangai}.\footnote{Throughout the paper, for simplicity, I refer to people from this community as ``transgender people''.} This group has a longstanding cultural and religious role in Indian society \citep{reddyRespectSexNegotiating2005, kalraHijrasUniqueTransgender2012}. 
%They feature in Hindu scriptures and frequently give blessings at traditional weddings and funerals. 
Their visually recognizable identity, however, leaves them particularly susceptible to discrimination \citep{sharmaChangingLandscapeSexual2014, agoramoorthyLivingSocietalEdge2015}. Economic discrimination against this group is multi-faceted. Transgender people are often excluded from traditional forms of paid employment, pushing many into poverty and sex work \citep{masih2012ummeedlive, shivakumar2014markers, badgett2014economic, nuttbrockTransgenderSexWork2018}, resulting in low levels of formal employment \citep{national2017study}. There are likely at least 0.5--1.25 million transgender people in India  \citep{jaffrey1996invisibles, indian_census_2011, dixitThirdGenderThird2023}, implying that such discrimination results in large welfare and efficiency costs \citep{badgett2014economic}.


%an unfortunate reality entrenched at least since the British colonial administration categorised them as a "criminal tribe".
%
%The community spans the sub-continent. 

%\textcolor{red}{UPDATE NEW POP SIZE ESTIMATE, and urban population}

%While estimates vary widely due to the difficulty of enumerating the population, there are likely at least 0.5--1.25 million transgender people in India  \citep{jaffrey1996invisibles, indian_census_2011, dixitThirdGenderThird2023}. Anti-transgender discrimination is therefore likely to result in substantial welfare and efficiency costs \citep{badgett2014economic}.

 



%Discrimination can also take the more extreme forms of police harassment, physical violence, and sexual violence  
%
%
%
%This in turn leaves them vulnerable to other forms of stigma, and contributes to high rates of HIV  \citep{}. 
%
%Fear of harassment acts as a barrier to accessing both medical treatment and education \citep{}. 
%
%For example, a survey in Kerala suggested that around 60\% of transgender persons had dropped out of school due to severe harassment \citep{sangamaTransgenderSurveyKerala2015}. 
%
%
%
%Together, these issues contribute to widespread mental health problems \citep{kalraCulturalPsychiatricSexuality2013, malMolestationBengaliHijras2018, jayadevaUnderstandingMentalHealth2017} and high rates of suicide \citep{halliSuicidalityGenderMinorities2021}.

%\textcolor{red}{Reframe in terms of "social activism" in favour of trans??}

Widespread prejudice and discrimination appear to be increasingly at odds with social norms that penalize discriminators or promote pro-trans activism. Discrimination, though common, can generate social disapproval: in the study control group, discriminatory scenarios were rated as ``wrong" \input{../../outputs/stats/prop_attitude_control_round.tex} of the time. %\footnote{These self-reported ratings may be vulnerable to social desirability bias. But such bias will lead people to report what they believe to be the socially appropriate answer, in line with the claim that social norms curtail the expression of prejudice.} 
  This aligns with other survey evidence that indicates widespread support for protecting transgender people from discrimination \citep{ipsosGlobalAttitudesTransgender2018}. The context is thus analogous to other settings where private prejudice is relatively common, but its expression may be inhibited by social sanctions for prejudiced behavior \citep{bursztynExtremeMainstreamErosion2020, bursztynJustifyingDissent2023}.

Legal changes may have contributed to the decreasing acceptability of discrimination. In 2014, the Supreme Court recognized all constitutional rights for transgender persons, along with their right to identify as a third gender, and encouraged government initiatives to combat anti-transgender stigma. 
%By institutionalizing transgender rights, these changes may reduce discrimination in social settings by signaling to the populace that discrimination is no longer socially acceptable. 
But awareness of these recent changes remains low. \input{../../outputs/stats/control_awareness.tex} of control participants either believe that trans individuals do not have any legal status in India, or cannot identify a single legal right they hold.%, allowing me to change participants' beliefs about the law and examine the effects on discrimination.

The study took place in Chennai, the largest city in the state of Tamil Nadu. The urban setting is advantageous because (i) approximately 80\% of thirunangai live in urban areas \citep{subramanian2015mapping}, so urban residents are aware of and can visually recognize transgender people; and (ii) urban residents are familiar with online delivery services, allowing me to use delivery service market research for the study.\footnote{\input{../../outputs/stats/have_ordered_from_app.tex} of the sample say that they have previously ordered goods to be delivered to their home using an app, reflecting the popularity of meal delivery services such as Swiggy and Zomato. The market research framing is also not so unusual in this context: \input{../../outputs/stats/taken_part_market_research.tex} of the sample have previously taken part in a market research survey or received a free item as a promotion.}


%are more likely  to have recently seen or interacted with a transgender person. Awareness and visual recognition of transgender people is therefore high. Second, urban residents are familiar with online delivery services, allowing me to use delivery service market research as a plausible framing for the study.\footnote{\input{../../outputs/stats/have_ordered_from_app.tex} of the sample say that they have previously ordered goods to be delivered to their home using an app,
% reflecting the popularity of meal delivery services such as Swiggy and Zomato. The market research framing is also not so unusual in this context: \input{../../outputs/stats/taken_part_market_research.tex} of the sample have previously taken part in a market research survey or received a free item as a promotion.}



















%. Discrimination has recently become \textit{legally} unacceptable: in 2014, the Supreme Court recognized all constitutional rights for transgender people, and in 2019, the government passed a law prohibiting discrimination against them. 





%It also appears to be \textit{socially} unacceptable. 

















	
		\section{Experimental design}
	
		
		\subsection{Design overview}
		
			

		
		\input{../../outputs/stats/total_n.tex} participants in Chennai, India took part in the field experiment. The experiment measures the effect of horizontal communication (group discussions) on subsequent hiring discrimination against transgender workers.
		
		The main goals of the experimental design were: (i) measuring discrimination in choices with real stakes; (ii) generating horizontal communication \textit{between} participants about transgender people, without the purpose of the study being obvious; (iii) understanding the mechanisms driving the effects; and (iv) benchmarking effects against the effect of delivering information about transgender persons' legal rights.		
		
		All treatments and primary data collection occurred in one session that lasted approximately 1 hour. To allow for a group discussion, 3 enumerators recruited and interviewed 3 respondents (a ``group'') at the same time. 
		
%		
%		All treatments and the primary data collection took place in a single session that lasted approximately 1 hour. To allow for a group discussion, enumerators always recruited and then interviewed 3 respondents at the same time. I call these 3 respondents a ``group".
		
		
%		{\color{red}MAIN GOALS OF THE DESIGN:
%		\begin{itemize}
%  \item Realistic real world choices in setting with natural discussion. 
%  \item Horiztonal communication without forcing participants to speak to each other...
%  \item Choices that reveal discrimination without making it too obvious what I was trying to study
%  \item Understand mechanisms - related to social norms and persuasion...
%  \item Discussions of pepole who know each other so it's natural and social image concerns play a role
%  \item What else??
%\end{itemize}
%Then add - how I fulfil this criteria
%		}
		
		
%		{\color{red}Horizontal vs top-down communication}


		

%To measure hiring discrimination, participants were offered a free grocery delivery to their home, and made a series of choices over the worker who would carry out the delivery and which items they would receive. 

To measure hiring discrimination, participants were offered a free grocery delivery to their home, and made 10 binary choices between different delivery workers and grocery bundles. One choice was randomly selected to be implemented: between 2 and 9 weeks later, the selected worker delivered the selected grocery bundle, and participants were asked follow-up questions.

%Each participant made 10 binary choices, one of which was randomly selected to be implemented. Between 2 and 9 weeks after the main session, the selected delivery option was carried out by the chosen worker, and participants were asked follow-up survey questions. 

The first four choices served as treatment variation while the final six measured outcomes. During the \textit{treatment round}, some participants discussed their preferences in groups, while others chose privately. Then during the \textit{outcome round}, all participants made their final six choices individually and privately. These private choices are the main outcome, allowing me to measure how discussions affected \textit{private} behavior after the discussion. 

%To measure the effects of a group discussion, the first 4 choices (the \textit{treatment round}) were used as a source of treatment variation. The remaining 6 choices (the \textit{outcome round}) were always made individually and in private. I use these later private choices as my main outcome, allowing me to examine the effect of the treatments on private individual choices. 
%{\color{red}why do I have these treatment arms??? The first set of treatments are identifying the discussion effects + possible confounds and mechanisms. The second set of treatment are identifying the effects of learning about legal rights. }

The main treatments examined how horizontal communication affects discrimination, and why. The main effect of interest compares (i) \textit{3-person discussion}, in which all 3 participants had a discussion about their preferred hiring options and made collective choices; and (ii) \textit{No discussion (private)}, a control condition in which all participants made private individual hiring choices even during the treatment round. Two further treatments explore the mechanisms: (iii) \textit{2-person discussion}, in which 2 participants had a discussion and made a collective choice, while the third participant silently listened; and (iv) \textit{No discussion (public)}, in which all participants made individual hiring choices that they knew would later be revealed to the other group members.



A secondary source of treatment variation tested the effects of top-down communication about legal rights. I cross-randomized a video shown to participants before they made any hiring choices. Participants either saw (i) a \textit{legal rights}  video containing information about the legal rights of transgender people, (ii) a \textit{rights messaging} video containing persuasive messaging in favor of transgender rights, or (iii) a \textit{control} video that did not mention transgender rights.



	\autoref{fig_main_design} shows a summary of the experimental design for the main session (\autoref{fig_main_design_detail} gives further detail). I describe the design further below, with additional details in Appendix \ref{sec_appendix_exp_design}.
	

	\begin{figure}[htbp]
		\centering
		\caption{Summary of experimental design}
		\includegraphics[width=0.8\linewidth]{../../outputs/figs/diagram_design_simple.pdf}
		
		\label{fig_main_design}
		\centering
		\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: 	More detail is given in Appendix Figure \ref{fig_main_design_detail}.
		\end{tablenotes}

	\end{figure}

	 
 
	
	
\subsection{Sample and recruitment}
	
Participants were recruited from urban areas in Chennai between March and July 2023 through direct household canvassing and introductions from community leaders (see \autoref{fig_survey_locations} for survey locations). All participants were offered a free grocery delivery as an incentive to participate. Participants were aged 20-65 and could read Tamil, and the median per capita food expenditure (Rs. 2000 per month; 87.40 USD PPP) was similar to a representative 2012 sample of urban Tamil Nadu residents \citep{nationalsamplesurveyofficeindiaHCEMonthlyCapita2012}.

We framed the study as market research about grocery delivery preferences to minimize experimenter demand effects. Participants were truthfully told that we were trying to understand people's preferences for grocery delivery options. Only \input{../../outputs/stats/purpose_0_mean.tex} of participants guessed the study's focus on transgender workers after completing their main hiring choices. These participants do not drive the treatment effects (see Appendix \ref{sec_alt_mechanisms}).

To allow for the group discussion, 3 enumerators always recruited and interviewed 3 respondents at the same time (including for the control group). To avoid recruitment strategies that differed across treatments, enumerators were blind to treatment status before starting the survey.%\footnote{Participants willing to be recruited as a group may be more sociable or socially sensitive than the average urban resident of Chennai. However, this does not appear to moderate the treatment effects of the discussion: there is no heterogeneity by intragroup relations or an index of individual sociability (\autoref{tab_het_group}).} 



Groups consisted of neighbors or acquaintances from the same street or locality to facilitate natural interactions. Group members knew each other \input{../../outputs/stats/prop_neighbours_knew_each_other.tex} of the time, described each other as family or friends \input{../../outputs/stats/prop_neighbours_relations.tex} of the time, and as neighbors \input{../../outputs/stats/prop_neighbours_neighbour.tex} of the time. To avoid hierarchical relationships in which one group member dominated a discussion, we recruited either all-male or all-female groups without multiple members of the same household. The sample was predominantly female (\input{../../outputs/stats/prop_female.tex}). The study's focus on deliveries was more relevant for females, since they were more likely to be responsible for managing food expenditures and receiving deliveries (\input{../../outputs/stats/prop_receive_delivery_female.tex}) than men (\input{../../outputs/stats/prop_receive_delivery_male.tex}).


	
	
	\subsection{Hiring choices}
	

	
%To obfuscate the purpose of the study and reduce experimenter demand effects, the survey was framed as a market research survey, and participants were truthfully told that we were trying to understand people's preferences for grocery delivery options. After the main hiring choices, only \input{../../outputs/stats/purpose_0_mean.tex} of the sample had correctly guessed that the purpose of the experiment was related to transgender workers (and the treatment effects are not driven by these participants, see Section \ref{sec_alt_mechanisms}).


Participants made 10 binary choices between delivery options, with one randomly selected to be implemented. Each option showed the workers' photos and the grocery items offered (see Figure \ref{fig_example_choice}), sometimes inducing a trade-off between a preferred worker and preferred items.

%	All participants made a series of 10 binary choices over which delivery option they preferred, one of which was randomly selected to be implemented. Figure \ref{fig_example_choice} shows an example of one such binary choice. For each choice, participants saw two options. Each option always included a photo of the worker and the items on offer, in some cases inducing a trade-off between a preferred worker and preferred items.\footnote{To minimize noise generated by differences in photos, all photos were headshots with a neutral grey background in which the worker had a neutral expression.}
	

	
	
	

	

  \begin{figure}[tbp]
		\centering
		\caption{Example of one of the binary choices participants face}
		\includegraphics[width=0.55\linewidth]{screenshot002}
		\label{fig_example_choice}
	\end{figure}
 
%-------------------------
 
%Each choice paired a cisgender male ``\textit{benchmark}'' worker with an ``\textit{alternative}'' worker who was cisgender male, cisgender female, or transgender.
	In each choice, there was a ``\textit{benchmark}" option (cisgender male), and an ``\textit{alternative}" option (either cisgender male, cisgender female, or transgender).\footnote{This reduces the number of gender combinations, thus increasing power on the male-to-trans comparison, although it does not allow a direct comparison of trans and female workers.} Throughout the paper, I measure anti-transgender discrimination as the reduction in probability of choosing the alternative when the alternative was transgender.
	
4 of the 20 workers were transgender. This proportion was chosen to ensure sufficient power without making the experiment's purpose too obvious. We randomized the alternative worker's position (left or right), the order of choices, and the specific photos within gender categories. We never showed the same worker twice to a participant.\footnote{Photos were selected from a pool of 20 males, 21 females, and 13 transgender people. Cisgender photos were of survey enumerators who carried out the main survey. Participants were never shown pictures of the enumerator-team that interviewed them to avoid response bias. Transgender photos showed real enumerators we recruited who had agreed to carry out deliveries if selected, but did not carry out the main survey. Later mechanism outcomes (that did not require workers to deliver goods) used stock headshot photos.}
 
 Participants could visually identify the transgender workers in the photos. A validation study in late 2022 ($N$=114) showed that participants correctly identified transgender photos as being transgender \input{../../outputs/stats/prop_trans_recog.tex} of the time (Appendix Table \ref{tab_trans_recog_matrix}).
	
	I measure participants' willingness to pay to avoid certain workers by randomly varying the items offered in each choice, so in 40\% of cases one worker offered more items than the other. Each worker offered either 1 item (masala spice mix), 2 items (masala and tea), or 3 items (masala, tea, and ghee). The randomization was balanced across genders. 	The clear ranking of the bundles made the tradeoff between item value and worker characteristics clear for participants. The value of the item bundles was substantial relative to participants' consumption, corresponding to 
	%they cost Rs. 68, Rs. 154, and Rs. 240 respectively, corresponding to 
	\input{../../outputs/stats/item_set_1_value}, \input{../../outputs/stats/item_set_2_value}, and \input{../../outputs/stats/item_set_3_value} of median daily per capita food expenditure. 
	
	
	
	
	
	
%	To evaluate how participants traded off material benefits with their preferences for workers, the number of items offered by each worker was randomly varied so that sometimes one worker offered more items than the other. This randomization was balanced across worker genders. Each option either offered 1 item (masala spice mix), 2 items (masala and tea), or 3 items (masala, tea, and ghee).
	
	
%	\footnote{} 
	
	
	
%	The clear ranking of the bundles made the tradeoff between item value and worker characteristics clear for participants. The value of the item bundles was substantial relative to participants' consumption, corresponding to 
	%they cost Rs. 68, Rs. 154, and Rs. 240 respectively, corresponding to 
%	\input{../../outputs/stats/item_set_1_value}, \input{../../outputs/stats/item_set_2_value}, and \input{../../outputs/stats/item_set_3_value} of median daily per capita food expenditure. 	
	
To ensure participants anticipated some social contact with the worker, participants learned they would have a 15-minute conversation to discuss their satisfaction with the service with their chosen worker during the delivery. When selecting workers, participants were told to consider the worker's characteristics, the items they offered, and this 15-minute conversation.
	
	To test if discrimination was statistical (e.g., stemmed from beliefs about worker reliability), some choices included truthful extra signals of worker quality. These included a ``reliability score'' (the proportion of successful deliveries from timed training exercises), work experience, and languages spoken (see Appendix \ref{sec_appendix_exp_design}). These were sampled to be balanced across genders.	
	
	
%	In some choice-pairs, additional truthful signals of worker quality were shown to evaluate whether discrimination was \textit{statistical}, e.g., driven by beliefs about whether they would reliably complete a delivery. Some choice-pairs reported the true proportion of successful deliveries from a set of timed training exercises carried out by all workers (the ``reliability score"). 
	
	
	
%	This reliability score incorporated exogenous variation in the perceived quality of each worker.\footnote{Participants were told that this was the proportion of completed deliveries from a training exercise. Workers completed multiple training exercises with different time limits, and I randomly showed their score within one of three categories: their low score (5 or 6), their mid-value score (7 or 8), or their high score (9 or 10). (see \autoref{sec_ethics} for discussion of the ethical considerations).}  
%	\footnote{I discuss the ethical considerations behind this design in \autoref{sec_ethics}. The randomization was set so that within a pair, the score category was either the same (60\% of the time), different by one level (30\% of the time), or different by two levels (10\% of the time).}
%	 In addition, for some choice-pairs, I truthfully reported (i) whether workers had 0-4 years or 5 years or more of work experience, and (ii) whether the worker spoke both Tamil and English or just Tamil. I sampled photos so that these characteristics were balanced across each worker gender.%\footnote{For experience and language, I always sampled worker photos so that these characteristics were balanced across each worker gender, to avoid participants making inferences on the "quality" of a gender over the course of the experiment.}
	 
	
	
%	--------
	
\textbf{Implementing choices}. To ensure incentive-compatibility, enumerators used scratch cards to randomly select 1 of the participants' 10 choices to be implemented. The participant received the chosen items from the chosen worker 2--9 weeks later, and the worker carried out a 15-minute follow-up survey at the same time. To minimize risk to transgender workers, the randomization selected choices involving transgender workers in fewer than 1\% of cases.\footnote{This design protected transgender workers from potential abuse or violence while maintaining incentive-compatibility: participants could receive a delivery from any chosen worker, but the probability was lower for transgender workers. When transgender workers did make deliveries, they were accompanied by a team of 2-3 enumerators and a supervisor, and minimized participant interaction. See Appendix \ref{sec_ethics} for discussion of the ethical considerations.} 

We helped participants understand the randomization scheme using a practice round. Mimicking the main hiring choices, participants made 4 binary choices between items worth under Rs. 5, with enumerators using scratch cards to implement one choice. We asked comprehension checks before the practice and main hiring round. The high success rates (\input{../../outputs/stats/practice_check_correct} correct first time for the practice round, \input{../../outputs/stats/hiring_check_correct} for the main round) indicated good understanding.

%A practice round familiarized participants with the randomization scheme. 

%To ensure the participants understood the randomization scheme, they first took part in a practice round, in which they made a series of 4 binary choices between items worth less than Rs. 5. Mimicking the main hiring elicitation, the enumerators used a scratch card to select which of the 4 choices was actually implemented. We also asked a series of comprehension checks before the practice round and main hiring round, and re-explained to respondents if they answered incorrectly. Participants responded correctly to these questions the first time they were asked \input{../../outputs/stats/practice_check_correct} of the time in the practice round, and \input{../../outputs/stats/hiring_check_correct} in the main hiring round, suggesting a high level of comprehension.
%\footnote{One concern with the randomization scheme was the risk of participants mistakenly believing that it was incoherent to select a transgender candidate for a second time having already done so once, e.g., because they think a transgender person is "already" doing a delivery. To try and prevent this logic, in the practice round, some items were included twice across different choices. If the participant selected an item the first time they saw it, and then avoided it the second time, we explained that they should treat each pair as a new choice and avoid thinking about previous choices when making their decision. In the outcome round, the correlation between selecting a transgender candidate the first and second time the participant saw one was positive and relatively large ($\rho$ = \input{../../outputs/stats/cor_trans_12.tex}, 95\% CI: [\input{../../outputs/stats/cor_trans_12_low.tex}, \input{../../outputs/stats/cor_trans_12_high.tex}]), suggesting that the participants' desire for diversity in their selection was not a first-order driver of their choices.}
	
	
	
	
%	Items - randomized 60\%, 30\%, 10\% (R1 and R2)
%	Number of pairs with each gender in all rounds
%	Left / right was randomized
%	Order of pairs was randomized
%	Reliability score diff (60\%, 30\%, 10\%)
	
	
	
	
%	In the other cases, the randomization will select pairs that include members of the team of enumerators as workers.  Randomization is stratified by participant gender and survey team.
	
%	Rs. \input{../../outputs/stats/hh_exp_daily_med.tex}

	
	
	
	
	\subsection{Treatments}
	
	\subsubsection{Discussion arms}
	
	
	The first four hiring choices (the \textit{treatment round}) varied how participants communicated about their decisions. Two of these four choices included a transgender worker. Groups were randomly assigned to one of four conditions, with randomization stratified by participant gender and survey team:

%To measure the effects of horizontal communication (a group discussion), I varied the elicitation process for the first 4 hiring choices (the \textit{treatment round}). In this round, 2 of the 4 pairs included a transgender worker. 


%In the treatment round, groups were randomized into one of the four conditions described below. Randomization was stratified by participant gender and survey team.%\footnote{It was especially important to stratify by survey team because the survey team that carried out the primary survey affected the set of photos a participant saw in the delivery options. This was because most of the worker profiles shown were survey enumerators that also elicited the primary survey, and I excluded the team that carried out the primary survey from the set of worker profiles. For example, if a participant was being surveyed by someone in team 1, I excluded all of team 1's photos from the hiring profiles, and only included the photos from team 2, 3, 4, etc. Since the set of worker photos varied across participants due to team allocation, the randomization was designed to minimize any differences across treatments in the team allocation.} 
	\begin{enumerate}
	\item   \textbf{3-person discussion} ($N$=890). 3 respondents discussed their delivery and worker preferences, and then made \textit{joint} choices (see more detail in Section \ref{sec_discussion_design_main_text}).
	\item   \textbf{2-person discussion} ($N$=549). Two randomly selected ``\textit{speakers}" discussed and chose together, while the third (the ``\textit{listener}") observed silently and listened to the speakers' choices and justifications. This mechanism arm tested whether the effects were driven by interpersonal influence, or whether active participation was necessary. %{\color{red}Do I ref the self-persuasion lit somewhere??, or should I refer to it again???}
	
	
%	 while the 3rd participant was a "\textit{listener}". The listener didn't take part in the discussion, but observed what the others said and chose. They were asked to be silent during the discussion. The listeners were included in order to measure the effect of hearing all the choices and justifications made by others, without actively taking part in a discussion.
	
	
	
%	Comparing the listeners to the observers was designed to measure the effect of hearing all the narratives and justifications that are raised in a discussion. And comparing the listeners to the speakers was designed to measure the effect of actively taking part in, rather than just listening to, a discussion.
	\item \textbf{No discussion (public)} ($N$=599). Participants made silent individual choices, knowing that their choices would be later announced to others in their group. This mechanism arm exogenously increases social image concerns, and evaluates whether this alone affected participants' choices. I also varied the timing of the announcement to test whether observing others selecting a transgender worker could affect subsequent discrimination, and how this compared to listening to a discussion. 2 randomly selected ``\textit{observers}'' learned others' choices before making their private outcome-round decisions, while the 3rd (``\textit{non-observer}'') learned others' choices only afterward.\footnote{Participants were not told about the distinction between \textit{observers} and \textit{non-observers} until after the end of the treatment round, in order to avoid this affecting treatment round choices.} These observers were designed to test whether changes in the perceived group norm of discrimination were sufficient to affect discrimination.
	
		
	
%	This arm was a mechanism treatment designed to evaluate how participants' choices were affected by social image concerns in the absence of a discussion. In addition, I varied the timing of the announcement. This allowed me to see if simply being told that another person had selected a transgender worker could reduce subsequent discrimination, and how this compared to hearing a discussion about a transgender worker. 2 randomly-selected participants out of 3 (the ``\textit{observers}") were told others' choices \textit{before} making their private outcome-round choices, allowing me to measure the persuasive effect of observing others' choices. The 3rd participant (the ``\textit{non-observer}") was only told \textit{after} making their private outcome-round choices. %\footnote{This 3rd participant was included to measure whether a participants' own choices in the treatment round had a causal effect on their choices in the outcome round, for example, through a desire to act consistently.}
	


  \item \textbf{No discussion (private)} ($N$=1365). Participants made choices individually and privately, serving as the control condition.
  
\end{enumerate}

Enumerator observations indicate that participants correctly followed the protocols.%\footnote{}





\subsubsection{Design of the discussion}
\label{sec_discussion_design_main_text}





%For the \textit{3-person discussion}, \textit{2-person discussion} and \textit{No discussion (public)} arms, participants completed the treatment round together in their group of 3. The group activity usually took around 10 minutes. The activity usually took place one of the participant's homes or in another nearby common area (e.g., the common courtyard in a tower block). For the \textit{No discussion (private)} treatment, participants were interviewed separately, out of earshot from one another. 

Treatment round activities for the three group conditions occurred with all participants together, usually in someone's home or a nearby common area like a building courtyard. These group sessions lasted about 10 minutes. In contrast, \textit{No discussion (private)} participants completed their interviews individually and out of earshot from one another.

Both discussion treatments required participants to make collective decisions for each pair. When these collective choices were selected by the scratch cards, each group member separately received the same items from the same worker. The collective choices created an incentive to actively participate in the discussion by ensuring the discussion had real stakes for participants. In contrast, participants in \textit{No discussion} treatments made individual choices.

Participants freely discussed their preferences and worked to resolve disagreements about options before coming to a conclusion about which option to select. I document the types of statements made during the discussion in Section \ref{sec_transcripts}. The enumerator leading the discussion never mentioned the word \textit{transgender} themselves to minimize demand effects and to measure only the effects of horizontal communication arising between participants (see Appendix \ref{sec_discussion_design} for the discussion script and further details).

%To minimize demand effects, and to measure only the effects horizontal communication arising between participants as a result of seeing the worker photos, t


%In the \textit{2-person} and \textit{3-person discussion}, discussion participants had to reach a \textit{collective} decision for each pair. If the scratch cards selected one of the collectively-chosen pairs, each member of the group separately received the same bundle of items from the same worker. 
%This was justified to participants by truthfully saying it would be logistically easier to organize.
%To ensure naturalism, participants were truthfully told that it would be logistically easier for us to organize the same worker to deliver the same items. 
%In contrast, those in the \textit{No discussion} arms simply selected the option they preferred individually.

%In each discussion, respondents discussed their opinion of each option, explained why they preferred one option or another, and tried to convince the group to choose their preferred option in cases of disagreement. To minimize demand effects, and ensure that the discussion was driven by horizontal communication that naturally arose \textit{between participants}, the enumerator leading the discussion never mentioned the word \textit{transgender} themselves. Instead, any discussion of transgender people was only initiated by the participant's response to a photo they saw (see Appendix \ref{sec_discussion_design} for the discussion script and further details).
	
	

		
	
	
	

	
		
	
	
%	In order to understand the mechanisms driving any effect of the discussion, two additional treatments were added during the second phase of the data collection.
	
%\begin{enumerate}



\subsubsection{Rights videos}

I cross-randomized a second set of treatments to compare to the effect of top-down communication about minority rights. Participants viewed one of three 80-90 second videos about rights before making their hiring choices. The video was narrated in Tamil by a local research team member (who was not shown). While most content remained constant across videos and explained consumer and worker rights for delivery services, the videos differed in one key example used to illustrate rights (Appendix \ref{sec_video_scripts} contains full scripts and further details):
%	To test the effect of top-down communication about minority rights, I cross-randomized a second set of treatments. Participants were shown one of three different videos about rights before making their hiring choices. 
%{\color{red}WHO IS SPEAKING IN THE VIDEO??}
%All videos lasted between 80 and 90 seconds, and were narrated in Tamil by a local member of the research team (who was not shown). The majority of the content was the same across all three videos, and explained consumer and worker rights in the context of delivery services, in line with the framing of the study as a market research survey for a delivery service. As treatment variation, I varied one of the examples used when explaining what ``rights" were:

\begin{enumerate}
  \item \textbf{Legal rights video} ($N$=1135). This informed participants that transgender people have legally protected rights in India: \textit{``The Supreme Court of India, the most powerful legal institution in the country, gave transgender people all the same fundamental rights as others under the Constitution of India. The law therefore gives them the right to housing, employment, and education without discrimination. All these rights that you have, they also have according to the law.''}
  \item \textbf{Rights messaging video} ($N$=1135). This argued that transgender people \textit{should} have rights but did not mention legal protections, testing whether rights-based narratives could affect discrimination without institutional backing: \textit{``Transgender people should have the same fundamental rights as others in India. They should have the right to housing, employment, and education without discrimination. All these rights that you have, they should also have.''}
\item \textbf{Control video} ($N$=1135). Participants received neutral information about voting rights: \textit{``Some people have the right to vote. If you have the right to vote, you can elect your representatives. That means you can choose who should be in power and who should make decisions on your behalf.''}
\end{enumerate}
To maximize understanding, participants were given comprehension checks and corrected if needed.
  
%  Participants were told that transgender people have \textit{legally instituted} rights in India. This video was designed to measure the effect of changing people's beliefs about the law on the level of discrimination. %Specifically, they were told:  \textit{``As another example, the Supreme Court of India, the most powerful legal institution in the country, gave transgender people all the same fundamental rights as others under the Constitution of India. The law therefore gives them the right to housing, employment, and education without discrimination. All these rights that you have, they also have according to the law."} 
  
 %Participants were told that transgender people \textit{should} have rights, but they were not told that they legally \textit{do} have those rights. This was designed to measure whether legal protection is important for reducing discrimination, or if simply communicating a narrative about the transgender rights without institutional support is sufficient. The wording was kept as similar as possible to the legal rights video:
%  \textit{``As another example, transgender people should have the same fundamental rights as others in India.
%They should have the right to housing, employment, and education without discrimination.
%All these rights that you have, they should also have."}
   
%  \item \textbf{Control video} ($N$=1135). Participants were not given information about the rights of transgender people. Instead, the video included placebo information about voting rights: \textit{``As another example, some people have the right to vote. If you have the right to vote, you can elect your representatives. That means you can choose who should be in power and who should make decisions on your behalf."}
%\end{enumerate}

%In the first video (\textit{Legal rights}), participants were told that the Supreme Court gave transgender people all fundamental rights as per the constitution of India, and that they are protected from discrimination by law. I compare this to a second video (\textit{Rights messaging}) which tells participants that transgender people \textit{should} have the same fundamental rights as others in India. This was intended to understand whether legal protection is important for reducing discrimination, or if simply communicating a narrative about trans rights without institutional support is sufficient. A third video (\textit{Control}) gave no information on the rights of transgender people, but included placebo information about voting rights. 
		%To minimize demand effects, all 3 videos were framed as being about the rights of delivery workers, and included the same content apart from a 30-second section which varied across treatments. 






%\subsubsection{Variation in stakes}
%\label{sec_high_stakes}
%
%{\color{red}CUT THIS SECTION??}
%
%%I examine whether the effects of the discussion are driven by participants engaging in cheap talk, and whether the treatment effects would be negated if the stakes were higher. 
%To examine the robustness of the results to variation in the stakes, for a subsample of 582 individuals in phase 1 of data collection, I also cross-randomized whether the participants were (truthfully) told that they would receive 1 delivery (N=288) or 3 deliveries (N=294) from the same worker. 

%The latter group were told that, based on their choices and the random selection in the hiring elicitation, they would receive 3 deliveries from the \textit{same} worker each time, and that the items they received each time would be of the same value as the items they selected. If participants choose transgender workers purely out of a desire to please the experimenter or to look good in front of their neighbors, while also bearing a utility cost each time they have to interact with a transgender person, then the participants offered 3 deliveries would discriminate more. And if experimenter demand effects drove the effect of discussions, we would expect the treatment effect to be smaller in the 3-delivery group.


%{\color{red}\textbf{ADD STUFF ON HIGH-STAKES TREATMENT VARIATION}}
	
	 
	
\subsection{Data collection phases and samples}
	
	 \begin{figure}[htbp]
		\centering
		\caption{Sample sizes and timeline}
		\includegraphics[width=0.8\linewidth]{../../outputs/figs/diagram_timeline.pdf}
		
		\label{fig_timeline}
	\end{figure}
	
%	The data collection was divided into two phases (see \autoref{fig_timeline}). In phase 1, only the \textit{No discussion (private)} and \textit{3-person discussion} arms were included. In phase 2, all 4 arms were included.

Data collection occurred in two phases (see \autoref{fig_timeline}). Phase 1 (March--April 2023) focused on the main treatment effect, comparing private choices to 3-person discussions. Phase 2 (May--July 2023) added the \textit{No discussion (public)} and \textit{2-person discussion} arms after receiving additional funding in order to understand the mechanisms behind the effects of the discussion.%\footnote{To increase power on the mechanism comparisons, I added additional sample size to the control arm in phase 2. This creates an imbalance: control observations came disproportionately from phase 2 compared to the \textit{3-person discussion} arm. All relevant analyses include phase fixed effects to account for this sampling structure. %The results are also robust to adding sampling weights that re-balance the treatment conditions (\autoref{tab_sampling_weight}).
%}

%\textcolor{red}{UP TO HERE}




%I conducted the experiment in two phases (see \autoref{fig_timeline}).  
%
%
%{\color{red}DESCRIBE MAIN DEVIATIONS HERE: "the main deviations are X, Y, Z..."}
%

When analyzing the data, I primarily use three different samples: (i) the \textit{3-person discussion sample} which measures the effect of the 3-person discussion by including \textit{No discussion (private)} and the \textit{3-person discussion} arms in both phase 1 and 2; (ii) the \textit{phase 2 sample} which uses only phase 2 and includes all treatment arms to analyze the effect of the mechanism treatments; and (iii) the \textit{video sample} that includes all data from all phases since the videos were cross-randomized across all discussion arms.

%When analyzing the data, I primarily make use of three different samples:
%\begin{enumerate}
%\itemsep-0.4em
%  \item \textbf{3-person discussion sample} includes both phase 1 and 2 of the \textit{No discussion (private)} ($N$=1365) and the \textit{3-person discussion} ($N$=890) arms. It is used to measure the effect of the 3-person discussion.
%  \item \textbf{Phase 2 sample} uses only phase 2, and includes all treatment arms. This is used to analyze the effect of the mechanism treatments relative to the \textit{No discussion (private)} arm and the \textit{3-person discussion} arm.
%%  This yields $N$=1365 for the \textit{No discussion (private)} arm and $N$=890 for the \textit{3-person discussion} arm. I control for the phase of data collection for this sample.
%%  \item \textbf{Phase 2 sample}. This uses only phase 2, and includes all treatment arms. This is used to analyze the effect of the mechanism treatments relative to the \textit{No discussion (private)} arm and the \textit{3-person discussion} arm. 
%  \item \textbf{Video sample}. Since the rights videos are cross-randomized across all discussion arms in both phases, I use all data from all phases and all discussion arms when analyzing the effects of the videos. I also control for the phase of data collection for this sample.
%\end{enumerate}

\subsection{Pre-analysis plan}

I preregistered the design of both phases, and document deviations from the pre-analysis plan in Appendix \ref{sec_preanalysis_plan}. I changed the main specification to exclude interaction terms between discussion and video treatments to ease interpretation and increase power (I also show interacted specifications in \autoref{tab_video_interactions}). The other main deviations were driven by unexpectedly low survey productivity in phase 1, which tightened budget constraints. These include: (i) dropping a mixed-video arm in which each group member saw different rights videos; (ii) delivering groceries after 2--9 weeks instead of 1 week; and (iii) collecting fewer mechanism outcomes.


\subsection{Balance checks}
%	
%Participants ($N = \input{../../outputs/stats/total_n.tex}$) were recruited from urban areas in Chennai, Tamil Nadu (see \autoref{fig_survey_locations} for survey locations). They were recruited using a number of strategies, including direct household canvassing and introductions from community leaders. The sample was restricted to individuals between the ages of 20 and 65 who could read Tamil.
%
%To allow for the group discussion, enumerators always recruited and then interviewed 3 respondents at the same time. To avoid recruitment strategies that differed across treatments, enumerators were blind to treatment status before starting the survey. This means that even the control condition were recruited as a group of 3. All 3 members of a group were always interviewed simultaneously. 
%
%To make any group activities as naturalistic as possible, all members of a group were neighbors or acquaintances that lived on the same street or within the same locality.  The group members knew each other 
%\input{../../outputs/stats/prop_neighbors_knew_each_other.tex} of the time, described each other as family or friends \input{../../outputs/stats/prop_neighbors_relations.tex} of the time, and as neighbors \input{../../outputs/stats/prop_neighbors_neighbor.tex} of the time.
%
%To avoid hierarchical relationships in which one group member might dominate a discussion, we always recruited either all-male or all-female groups, and we did not recruit multiple members of the same household in a group. The majority of the sample (\input{../../outputs/stats/prop_female.tex}) was female. The study framing was more relevant for females, since females were more likely to be responsible for managing household food expenditures in our setting, and were more likely to be the person responsible for receiving deliveries (\input{../../outputs/stats/prop_receive_delivery_female.tex}) than men (\input{../../outputs/stats/prop_receive_delivery_male.tex}).

The treatment groups were well balanced on key characteristics (Tables \ref{tab_balance_pooled}--\ref{tab_balance_phase2}), with joint $F$-tests showing no systematic differences from the control group for any treatment. As expected, given the large number of comparisons, individual variables show some statistically significant differences across treatment groups. \textit{3-person discussion} participants were more likely to have employed someone in the last 2 years (\autoref{tab_balance_pooled}), and \textit{rights messaging video} participants came from slightly larger households, with a slightly lower per capita food expenditure (\autoref{tab_balance_videos}). I use LASSO to select all controls that predict both treatment status and outcomes (\citealp{belloniInferenceTreatmentEffects2014}; see Appendix \ref{sec_lasso}), so these imbalances are unlikely to affect my results.	
	
	
	


	
	
	
	
%	In the main treatment arms, all participants in a group see the same video, yielding the following 6 arms, with approximately 300 individuals in each arm.
%	\begin{enumerate}
%		\item Control video + No Discussion
%		\item Messaging video + No Discussion
%		\item Law video + No Discussion
%		\item Control video + Discussion
%		\item Messaging video + Discussion
%		\item Law video + Discussion
%	\end{enumerate}
	
%	In an additional 7th treatment arm, containing approximately 480 individuals, one person per group are shown the \textit{Law} video, while the other two people are shown the \textit{Control} video. All groups in this arm are involved in a discussion:
%	\begin{enumerate}
%		\setcounter{enumi}{6}
%		\item Mixed videos + Discussion
%	\end{enumerate}
%	This arm provides independent variation in a participant's treatment status, conditional on the treatment status of others in their group, allowing me to identify within-group spillover effects. I use $ k \in \{1, ..., 7\} $ to denote each of the 7 treatment arms.
	
%	After the treatment round of hiring, participants carry out the outcome round of hiring, in which they make 6 more choices of which delivery worker they would like to hire. Choices in the outcome round are made individually and in private.
	
%	In total, across rounds 1 and 2, participants make 10 binary choices of delivery worker. Scratch-cards will be used to randomly select 1 of the 10 hiring choices to be implemented, so that the participant actually receives a delivery from the chosen worker 1 week later. To minimize risk to transgender workers, the randomization will be designed so that choice pairs that include a transgender worker will be selected in less than 1\% of cases. In the other cases, the randomization will select pairs that include members of the team of enumerators as workers.  Randomization is stratified by participant gender and survey team.
	

	
	
%	\paragraph{''High-stakes'' condition} Approximately 50\% of the sample in treatments 1-6 will be randomized into a ``high-stakes'' condition. In this condition, participants will be told that they will not only receive 1 delivery from their chosen worker, but instead will receive 3 deliveries from the same worker over the course of the following 2 months. The items in these deliveries will be of the same monetary value as the participant's implemented choice for the first delivery. This variation will be used to examine whether reducing the probability that participants engage in ``cheap talk'' or socially desirable behavior affects the treatment effect size. For all main analyses that do not focus on this variation, the high-stakes and low-stakes conditions will be pooled. I will separately analyze heterogeneity along these lines as a supplementary analysis.
	
%	\section{Outcomes}
	
	\subsection{Outcome and specification}
	
	The pre-specified primary outcome is participants' individual choices in the outcome round of hiring. The design thereby estimates the causal effect of the discussion on participants' \textit{post-discussion} private discrimination. The outcome round occurred after group activities had finished, with 94\% of respondents confirming that other group members could not hear their responses. While these choices were designed to be as private as possible, minimizing social image concerns,  participants' choices may still have been affected by social image concerns if (i) they knew each other and would discuss choices later; (ii) they anticipated that neighbors would observe the delivery worker when the delivery took place; and (iii) enumerators observed the answers given by respondents. While I cannot rule out channel (i), for robustness I use an ``extra private'' outcome that addresses channels (ii) and (iii) (see Appendix \ref{sec_alt_mechanisms}).
%
%
%
%While these choices were designed to be as private as possible, participants' behavior may nevertheless have been affected by social image concerns because (i) participants knew each other and might ask each other what they chose, (ii) they might anticipate that neighbors would observe the delivery worker when the delivery took place, and (iii) enumerators observed the answers given by respondents. These imply that social image concerns may still play a role in the outcome round choices. While I cannot rule out channel (i), for robustness I design an ``extra private'' outcome that addresses channels (ii) and (iii) (see Section \ref{sec_alt_mechanisms}).
%
%	The outcome round choices were designed to be private. Participants who had previously been in a group setting moved to be out of earshot of one another. Accordingly, 94\% of respondents reported that others in their group could \textit{not} hear their responses in the outcome round. However, the choices were arguably not \textit{completely} private, because (i) participants knew each other and might ask each other what they chose, (ii) they might anticipate that neighbors would observe the delivery worker when the delivery took place, and (iii) enumerators observed the answers given by respondents. These imply that social image concerns may still play a role in the outcome round choices. While I cannot rule out channel (i), for robustness I design an ``extra private'' outcome that addresses channels (ii) and (iii) (see Section \ref{sec_alt_mechanisms}).


	
	
	


	
%	I therefore aim to estimate the causal effect of the discussion and the rights videos on participants' individual, private choices.
	
	
	
	
	
	
	 
	
	
	
	
	
	
	
	 
	
%	When asked after completing the outcome round, \input{../../outputs/stats/prop_others_could_hear_answers.tex} of respondents thought others in their group could \textit{not} hear their responses.
	
	
	
	
	
	
	
	
%	\footnote{} {\color{red}ADD DISCUSSION OF the fact that it's not truly private because they know each other, and can observe deliveries; talk about extra private outcome...}
	
		

%	{\color{red}What constitutes "discrimination" in this set up?? - gender penalty (holding item differences fixed...)}
	
%	\textbf{ADD PRIVACY CHECK}
%	\textbf{ADD LISTENER FIDELITY CHECK}
	
	
	
	
	
	
	
	  The outcome round included 6 binary choices, two of which included a transgender worker.
 The main specification for participant $ i $ in group $j$, making a choice for the pair of workers $ k $, is:
	\begin{equation}
		\begin{aligned}
			\label{eqn_main_spec}
			\textit{ChooseAlternative}_{ijk} &= \sum_{\tau \in \mathcal{T}} \beta_{\tau} \left(\textit{Treat}_{\tau ij} \times \textit{Trans}_{ijk}\right) + \gamma \textit{Trans}_{ijk}  + \sum_{\tau \in \mathcal{T}} \delta_\tau \textit{Treat}_{\tau ij}  \\
			& \quad \quad + \textbf{X}_{ijk}' \Gamma_0 + (\textbf{X}_{ijk}' \Gamma_1 
			\times \textit{Trans}_{ijk}) 
			+ \varepsilon_{ijk}
		\end{aligned}
	\end{equation}
%	where:
$\textit{ChooseAlternative}_{ijk} = 1$ if $i$ selects the \textit{alternative} worker in pair $k$ (who could be transgender or non-transgender), and is $0$ when $i$ selects the male \textit{benchmark} worker.
 $ \textit{Trans}_{ijk} = 1 $ if the alternative worker is transgender, and is 0 if the alternative worker is non-transgender (cisgender male or female). The alternative worker is always compared to a male benchmark worker.
$ \textit{Treat}_{\tau ij}$ is a dummy for whether $i$ is in treatment arm $\tau \in \mathcal{T}$, where the set of treatments $\mathcal{T}$ is either (i) a dummy for the 3-person discussion, (ii) dummies for each discussion-arm treatment, or (iii) dummies for each rights video.
		%I do not include interaction effects between the videos and discussion arms in the main specification, but they are shown in \autoref{tab_video_interactions} and \autoref{fig_big_summary_fig}.
	
%		It takes the value 1 for groups that take part in a 3-person discussion in the treatment round if $i$'s group $j$ takes part in a 3-person discussion in the treatment round, and 0 if they complete the treatment round individually and in private. 
%		\item 
		
%		When analyzing effects of Phase 2 treatments, I instead use a series of dummies indicating each treatment group.
 $ \textbf{X}_{ijk} $ is a vector of controls that are included in some specifications. Controls are interacted with $ \textit{Trans}_{ijk} $ to control for differences in discrimination driven by observables. The controls include stratum fixed effects, differences in items offered,  differences in reliability score, the benchmark worker's reliability score, whether the reliability score was shown, question order fixed effects, a dummy for whether the alternative worker was shown on the right, and data collection phase fixed effects. When analyzing the discussion-arm treatments, I control for the rights videos, and vice versa. I use double LASSO \citep{belloniInferenceTreatmentEffects2014} to select additional controls that predict both the treatment and outcome variables (see Appendix \ref{sec_lasso}).
		%	, including (i) the difference in number of items offered between the pair, (ii) the difference in the quality measure between the pair, (iii) $ \textit{Woman}_j $, that describes whether the comparator was a woman. 
		%{\color{red}USE LASSO TO SELECT CONTROLS??}
		%		\item $ \mu_i $ are individual fixed effects
		
		
		
%		\item $ \pi_{s(i)} $ are , which are interacted with $ \textit{Trans}_{ij} $ in this specification in order to control for stratum level differences in discrimination across all treatment groups
		%	 {\color{red}Add order fixed effects, add STRATUM fixed effects!!! - to other stuff too}
	%Standard errors will be clustered by {\color{red}\textbf{GROUP}? (and individual?)}. 
	
	Throughout the paper, I define discrimination as the reduction in the probability that a worker is chosen because they are transgender (relative to non-transgender), conditional on other characteristics of the delivery options, such as the items on offer.
	
	The main treatment effects are thus given by the coefficients $ \beta_\tau $, which describe the reduction in discrimination caused by the treatments. When interacted controls are not included, $ \gamma $ describes the control group's discrimination against transgender workers. Standard errors are clustered at the group-of-3 level. For tables in the main text, I use randomization inference to calculate $p$-values \citep{youngChannelingFisherRandomization2019}. Since I have only one primary outcome, I do not correct it for multiple hypothesis testing.

	
	
	\subsection{Mechanism outcomes}
\label{sec_mech_outcomes_main}
	
The experiment also measured several mechanisms (see Appendix \ref{sec_data} and the relevant results section for more detail on each). %Some measures were included only for a single phase of data collection, as I specify below.
	
	\textbf{Baseline measures} included questions on (i) demographics; (ii) social desirability bias based on \citet{crowne1960marlowe} (phase 1 only); (iii) proximity of relationships between group members (phase 2 only); and (iv) a persuasiveness index to measure how persuasive an individual was likely to be in a discussion (phase 2 only). 
%	\begin{enumerate}
%  \item \textit{Demographics}, such as age, religion, and marital status.
%  \item \textit{Social desirability bias} (Phase 1 only). To measure a participant's propensity to give socially desirable answers, I use a shortened version of the \citet{crowne1960marlowe} module, which has been used elsewhere in India for a similar purpose \citep{dharReshapingAdolescentsGender2022}. The questions ask whether the respondent has a number of "too good to be true" traits. 
%  \item \textit{Group composition} (Phase 2 only), a series of questions designed to measure the proximity of relationships between group members, e.g., by asking how well they know each other.
%  \item \textit{Persuasiveness scale} (Phase 2 only), a series of questions designed to measure how persuasive an individual is likely to be in a group discussion.
%\end{enumerate}
These questions were intermingled with questions about deliveries to reinforce the study's framing as market research.\footnote{I did not include baseline measures of anti-transgender attitudes or pre-treatment hiring choices. While this would have increased power and yielded insights into the relationship between baseline attitudes and group discussions, it also risked undermining the credibility of the main results for two reasons. First, asking additional questions about transgender people risked making the true purpose of the study more salient, exacerbating concerns of experimenter demand effects. This concern was especially severe for attitude questions that explicitly talk about discrimination against transgender people, contrasting with the hiring questions that are subtler and less obviously focused on discrimination. Second, evidence suggests that people prefer to act consistently with previous actions \citep{falkConsistencySignalSkills2017}, and persuade themselves to ensure their preferences align with their previous actions \citep{schwardmannSelfPersuasionEvidenceField2022}. If true, measuring baseline discrimination would anchor behavior to a pre-treatment state, and lead treatment effects to be underestimated. }

%\textbf{Video comprehension checks}. After the videos about rights were shown, we asked comprehension questions about the content of the videos, and corrected participants if they did not answer correctly.

\textbf{Treatment round choices}. Hiring choices during the \textit{treatment round} are a pre-specified secondary outcome. These allow me to examine what choices were made \textit{during} (rather than \textit{after}) the discussion.

\textbf{Group observations}. During the group activities, an enumerator who was not facilitating the discussion noted observations about the group activity (e.g., who spoke first, who spoke in favor of a transgender worker). We recorded and transcribed the audio of the 3-person discussions,\footnote{Consent for audio recordings was refused in \input{../../outputs/stats/prop_audio_consent.tex} of discussions. The discussion's effects are not significantly different for groups that refused to be recorded ($p$ of difference = \input{../../outputs/stats/p_val_audio_refused.tex}).}, and translated the Tamil transcriptions to English, yielding a dataset at the (sentence $\times$ group $\times$ choice) level. The enumerator observations and audio transcripts are complementary since it was not possible to determine \textit{who} said what in the transcripts.


%\textcolor{red}{MORE ON TRANSCRIPT DATA -- how was it transcribed, at the sentence level, etc.}

%\textbf{ADD FOOTNOTE ON AUDIO CONSENT}
%\footnote{\input{../../outputs/stats/prop_audio_consent_yes.tex} groups consented to be recorded. The treatment effects of the 3-person discussion are not significantly different for groups that refused to the audio recording ($p$ of difference = \input{../../outputs/stats/p_val_audio_refused.tex}).}


%\textcolor{red}{Also need to explain: how transcript data doesn't know who said what; but enumerator observations do, so they are complementary (justify each data source and its use a bit)}


%\textcolor{red}{Say that I have transcript data for 3-person discussion only}


%\textcolor{red}{
%\textbf{DISCUSSION OBS}
%During the group activities, one enumerator facilitated the discussion, instructing the participants on what to do and prompting them to speak. 
%A second enumerator handed out sheets showing the delivery options to participants. 
%Another enumerator marked a series of observation questions about the group activity, which were pre-specified as secondary outcomes. For example, they marked who spoke first, who dominated the discussion, the main reasons participants cited in the discussion for making their choices, who spoke in favor of a transgender option, whether anyone said something positive or negative about transgender workers, and how much discussion occurred for each pair. We also recorded the audio of the discussion, and research assistants transcribed the audio and translated it into English.\footnote{Consent for audio recordings was refused in \input{../../outputs/stats/prop_audio_consent.tex} of discussions. The treatment effects of the 3-person discussion are not significantly different for groups that refused to the audio recording ($p$ of difference = \input{../../outputs/stats/p_val_audio_refused.tex}).} %\textbf{{\color{red}We transcribed the discussions, yielding data on the dynamics of the discussion (TBC)}}.
%\textcolor{red}{ADD SOMETHING ON TRANSCRIPT DATA}
%}




% for the \input{../../outputs/stats/prop_audio_consent.tex} of groups who gave consent to be recorded.


%During the group activities, one enumerator facilitated the discussion, instructing the participants on what to do and prompting them to speak. 
%A second enumerator handed out sheets showing the delivery options to participants. 
%Another enumerator marked a series of observation questions about the group activity, which were pre-specified as secondary outcomes. For example, they marked who spoke first, who dominated the discussion, the main reasons participants cited in the discussion for making their choices, who spoke in favor of a transgender option, whether anyone said something positive or negative about transgender workers, and how much discussion occurred for each pair. We also recorded the audio of the discussion, and research assistants transcribed the audio and translated it into English.\footnote{Consent for audio recordings was refused in \input{../../outputs/stats/prop_audio_consent.tex} of discussions. The treatment effects of the 3-person discussion are not significantly different for groups that refused to the audio recording ($p$ of difference = \input{../../outputs/stats/p_val_audio_refused.tex}).} %\textbf{{\color{red}We transcribed the discussions, yielding data on the dynamics of the discussion (TBC)}}.
%\textcolor{red}{ADD SOMETHING ON TRANSCRIPT DATA}


	
\textbf{Post-hiring mechanisms}. Immediately after the hiring choices, we elicited further mechanism outcomes. Pre-specified secondary outcomes were: (i) predictions about the private hiring choices of other unknown people in the study; (ii) predictions about the private hiring choices of other participants in the same group; (iii) self-reported disapproval of discrimination when presented with discriminatory scenarios; (iv) a double list experiment \citep{droitcour2004item, glynnWhatCanWe2013} measuring the proportion of people agreeing with a discriminatory statement; and (v) questions about the legal status of transgender people.
%(along with other questions about the rights of delivery workers to obfuscate the purpose of the section). 
Additional exploratory mechanisms included: (vi) beliefs about reliability (the probability that the worker will complete a delivery); (vii) hiring choices for a private grocery pick-up unobservable to neighbors (phase 2 only); 
%(viii) recall checks, in which participants were asked to recall the choices made by themselves or others earlier in the survey (phase 2 only); 
(viii) a measure of salience of the word ``transgender'' using a surprise recall task;  (ix) two measures of participants' beliefs about the purpose of the study; and (x) self-reported reasons for their hiring choices (e.g., the most important factors when making their decision).



%
%\begin{enumerate}
%  \item \textit{Reasons for choices}. Participants told us the most important factors when making their hiring decisions, and in phase 2, specific reasons for choosing each of the options they selected.
%  \item \textit{Perceived reliability}.  To measure whether there were changes in the perceived reliability of transgender workers as a result of the discussion, participants were asked to say how likely they think a certain worker was to complete a delivery if they were selected to deliver. Participants were shown two photos of workers previously shown during the hiring process: one worker was transgender, and the other was male.
%  \item \textit{Laws}. We asked participants a series of questions about their beliefs about the legal status of transgender people, along with other questions about the rights of delivery workers to obfuscate the purpose of the section.
%  \item \textit{Disapproval of discrimination}. 
%Participants were presented with two scenarios depicting instances of discrimination against transgender individuals and were asked to evaluate whether the discriminator's actions were acceptable or wrong. The first scenario involved an employer rejecting a transgender applicant for employment, while the second scenario involved a woman avoiding a transgender individual on the street. 
%
%\item \textit{private grocery pick-up choices} (Phase 2 only). Even when participants made hiring choices in private (without their neighbors listening), their choices in the main round may have been affected by social image concerns. Knowing that their neighbors might see who delivers groceries to their home, they might choose a transgender worker to signal that they are non-discriminatory to their neighbors. To evaluate whether the treatment effects were driven by social image concerns, we therefore added an outcome measure in phase 2 that was designed to be more robustly private than the main outcome. This "private grocery pick-up choice" was more private in two ways. 
%
%First, in order that neighbors would not be able to see which worker was chosen during a delivery, participants were told that they would have to pick up grocery items themselves from our office.  Specifically, they were told that they had been entered into a lucky draw to win a Rs. 5000 gift voucher which could be used to buy grocery items. The winner would have to organize getting the items by calling the worker they selected, telling the worker which items they wanted, and meeting the worker at our office to pick up the items. In order to ensure that participants anticipated some extended face-to-face contact with the worker, they were also told that they had to have a 15-minute conversation with the worker to give feedback on the process.
%
%In this round, participants saw 4 pairs of options for who they could pick up the items from, and were told that if they won the lottery we would randomly select one of their choices to organize the pickup with. 2 of the 4 pairs included a transgender worker.
%
%Second, we also adapted the elicitation process so that the enumerator giving the interview did not know what responses were given. We did not ask the respondent for their choice verbally, as in the main hiring rounds. Instead, we gave the tablet directly to the respondent, and they clicked their preferred answer. Upon clicking, the tablet would automatically skip to the next question and not reveal again the answer chosen before, making it impossible for the enumerator to know what was selected. We truthfully told respondents that enumerators wouldn't know what was selected, making the answers anonymous.\footnote{Although participants still presumably realized that their data could be used for research purposes, this elicitation nevertheless plausibly reduces the impact of social image concerns on their behavior because the salient social judge, the enumerator, would not know how they had answered.}
%  
%  The anonymity of their answers was well understood by the participants: only \input{../../outputs/stats/prop_an_check1.tex} said that their neighbors would know which options they picked, and only \input{../../outputs/stats/prop_an_check2.tex} said that the surveyor would know.
%
%
%
%%  \item  \textit{Anonymous choices} (Phase 2 only): Participants answer a second series of hiring questions that are designed to be more robustly private than the main outcome measure. Participants may not view the main hiring choices as a private choice, because (i) the enumerator asks the participant to make a choice and say it out loud, so the enumerator knows the choice that is made, and (ii) the delivery was to be carried out by delivering items to the participant's home, so the neighbors are likely to see who carries out the delivery. For both these reasons, the main hiring choices may be affected by social image concerns rather than representing a participant's true private attitudes. To evaluate whether the treatments have an effect on private choices, we ask participants a series of 6 additional binary hiring questions, which simply show two workers with no additional characteristics or details, and ask the participant to select their preferred worker. These questions are more private than the main outcome in two ways. 
%  
%%  First, the choice involves a delivery pick-up that would not be observable to neighbors. For these choices, they are told that they will be entered into a lucky draw that can be used to buy grocery items. If they win, to receive the items, they had to organize with the worker of their choice to collect the items from our office, at which time they will have to have a 15 minute conversation with the chosen worker. Since the participant was required to pick up the items from our office rather than delivered to the participant's home, there would be no direct way for the neighbors to know which worker was selected.
%
%%  Second, the question elicitation was designed to be hidden from enumerators. Instead of verbally asking a participant to report which worker she preferred, the data collection tablet was given to the participant, and she selected by clicking the tablet in silence. The tablet would automatically skip to the next question upon being clicked, and would not reveal the previous answer that was given if you went back, meaning that the enumerator would not know the answer selected.
%  
%  
%  
%  
%   
%%  In addition  
%%{\color{red}\textbf{ADD MORE HERE}}
%  \item \textit{Memory check} (Phase 2 only). Participants were asked to recall the choices made by themselves or others in earlier rounds, in order to measure how much attention they paid to their choices.
%  \item \textit{Predicted choices (community)}. Participants made incentivized predictions of the choices of others in the study whom they did not know. They were shown 3 pairs of delivery options, and truthfully told that 20 other people in the study had been shown those pairs. They had to predict how many of those 20 picked each option.  If they made the closest guess on average across all 3 pairs, they were entered into a lottery to win 3000 Rs.' worth of additional items. 2 of the 3 pairs were male-to-male comparisons; the third pair compared a male and a transgender.
%  \item \textit{Predictions about others (group)}: Participants then made incentivized predictions of the hiring choices of the other two people in their group. For each of the other two group members, they were asked to predict which option the other person chose for two pairs of delivery options. The predictions concerned the real-stakes hiring choices made by their group members in the \textit{outcome round}, meaning that they were predicting the private choices of others in their group.\footnote{When participants were making their main hiring choices, they did not know that their neighbors would later be paid for predicting their answers. This rules out concerns that they tried to make their hiring choices more predictable in order to help out their neighbors.} For each other person, one choice-pair compared a male and a male, and another compared a male and a transgender. If they correctly guessed all 4 combinations they were entered into a second lottery to win a separate prize, also worth 3000 Rs.
%  
%  
%%  {\color{red}specify about private choices of others}
%    \item \textit{List experiment}. In order to examine whether individuals' private attitudes towards transgender people outside the sphere of hiring were changed by the group discussion, we used a double list experiment \citep{droitcour2004item, glynnWhatCanWe2013}. It measured the proportion of people who agreed with the statement ``In general, if I see a transgender person, I walk away.''. Since the list experiment does not allow the enumerator or the researcher to infer \textit{which} statements the participant agrees with, it preserves the anonymity of their responses and so is less likely to be vulnerable to social desirability concerns than standard self-reported attitude questions. Enumerators read out two lists (A and B) that contained 6 non-sensitive statements about a participant's preferences. The statement about transgender people was randomly added to either list A or list B. Whether list A or list B was read first was also randomized. For each list, enumerators asked how many statements in the list they agreed with.
%  \item \textit{Salience} (Phase 1 only). We included two  recall tasks in which participants have to restate as many items as possible from a list of items, one of which includes the word "transgender". The probability of recalling the word transgender, conditional on the number of other items recalled, is used as a measure of the salience of the idea of being transgender.
%  \item \textit{Perceived purpose}: If participants wanted to please the surveyors or researchers, then those who correctly guessed the purpose of the study may have discriminated less against transgender people. For this to undermine the treatment effect estimates, participants would have to guess the purpose of the experiment more often in the discussion groups, and this would result in a positive interaction between those who correctly guessed the purpose and the discussion treatment.  I asked respondents to report their beliefs about the purpose of the study twice during the main session: immediately after the hiring choices (i.e., after eliciting the main outcome) and again at the very end of the session. I classify people as having correctly guessed the study's purpose if they said it was to measure preferences for hiring transgender individuals.
%  \end{enumerate}



\textbf{Follow-up survey}. When the delivery occurred, an average of \input{../../outputs/stats/follow_up_lag_mean.tex} days after the initial survey (SD: \input{../../outputs/stats/follow_up_lag_sd.tex} days), we conducted a short (15-minute) survey to measure the persistence of treatment effects. As a pre-specified secondary outcome, we asked 6 more hypothetical hiring choices with new worker photos and different grocery items. We made it clear to respondents that their choices would not result in actual deliveries.

As pre-specified, I correct for multiple hypothesis testing within sets of secondary outcomes, namely for attitudes (the list experiment and discrimination disapproval measure), and for norms (the predicted choices for community and own group).
			
	
%	In the main specification, I will only use the choices from the outcome round, and not use any choices from the treatment round in the analysis. Since there is only one primary outcome, I will not adjust for multiple hypothesis testing for the discrimination measure.
	
	
	
	%Need to explain
	%\begin{itemize}
	%	\item Offered 1, 2, or 3 items
	%\end{itemize}
	
	
	%\textbf{HOW TO SPECIFY WHICH ONE IS THE COMPARATOR?}
	
	%	ANCOVA SPEC? - no don't think this makes sense..., because I need the male-male obs in 
	%	Clustering - cluster by individual
	%   Individual fixed effects...
	%   Pre-specify which one is the comparator?
	














%\textbf{Interpretation:} \textit{remember $D$ is negative}.
%Gradient wrt $n$ is higher for $Listener$, and since gradient is presumably negative it means the slope is \textit{flatter} for \textit{Listener}. 
%
%When slope of $D^{pre}$ is steeper for \textit{Listeners} than \textit{NonObs}, then we don't know whetehr the steeper gradient is due to persuasion or due to other people correlating with me beforehand more...

%Intuition - need to assume that participants don't tailor their responses to the listener any more in the discussion than in the social image arm... \textbf{Plausible given that choices are public in both cases}... 
%
%\textit{How does this link to the fact that lots more people in discussion arm choose N = 2??} \textbf{is this evidence for or against the assumption??}
%
%\textbf{And empiricially the difference seems to be about 0} - see "inferring $D_{pre}$"
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%









		
		
		
	\section{Results}
	\label{sec_results}
	
%In this section, I present the effects of the main treatment variations.	I first present the effects of the 3-person discussion on the private hiring choices. Next, I present the effects of the rights videos. Finally, I present the effects of both treatments on the medium-run hiring choices in the follow-up survey.
		
	\subsection{Effect of 3-person discussion}
	
	
%	{\color{red}You want to be more precise here about the definition of discrimination. Is discrimination just choosing a certain group less than a different one? A more strict definition would be that people are giving up money (or experience/quality) in order not to be served by a transgender person. In such cases, i.e., focusing on choices where discrimination is possible by this definition, what fraction of people discriminate?}
	
%	\textcolor{red}{CHANGE TO COMMENT ON MAIN FIGURE}
	
	The 3-person discussion leads to large reductions in discrimination in the private choices made \textit{after} the discussion in the later outcome round (Table \ref{tab_main} and Figure \ref{fig_main_bar}). In the control group, \textit{No discussion (private)}, there is substantial discrimination: participants are \input{../../outputs/stats/baseline_discrim.tex} p.p. less likely to select a transgender worker than a non-transgender worker ($p$$<$0.001, Table \ref{tab_main}, Column 1). But if participants were earlier involved in a group discussion and collective hiring decision, the probability that they chose a transgender candidate in their individual choices increases by \input{../../outputs/stats/main_treatment_effect.tex} p.p. ($p$$<$0.001). Participants in the discussion arm thus do not discriminate against transgender workers on average ($p$=\input{../../outputs/stats/treatment_discrim_p.tex}). 
	
	The treatment effect of the 3-person discussion is robust to the inclusion of controls (Column 2), to including only choices that involve a transgender worker (Column 3), and to restricting the sample to participants who did not see a video discussing transgender rights (\autoref{tab_control_vid_only}).\footnote{Cross-randomized designs that do not account for interaction terms can yield incorrect inference \citep{muralidharanFactorialDesignsModel2023}. I show interaction terms for completeness (\autoref{tab_video_interactions}), but since the results hold for participants who only saw the control video, interaction effects cannot be driving the main effects of the discussion.} 
	
	



	
	\begin{table}[htbp]
\caption{Effect of 3-person discussion on private choices in outcome round (3-person discussion sample, Phases 1 and 2)}
\label{tab_main}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/main_table.tex}
	}
	\begin{tablenotes}
\item	\footnotesize \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Randomization inference p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample includes the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phase 1 and 2. Column (3) only includes choices that involved a transgender worker. 
In columns (1) and (2), the outcome is whether the \textit{alternative worker} (rather than the male \textit{benchmark worker}) was selected in the private choices in the \textit{outcome round}. In column (3), it is whether the transgender worker was selected. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female. 
The specification used is seen in equation \ref{eqn_main_spec}.  Controls include stratum fixed effects; dummies for the rights videos; whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; whether the alternative worker was shown on the right; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). In column (2), controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown. Relative \# items offered is the number of items offered by the \textit{alternative} worker minus the number of items offered by the male benchmark worker. Relative reliability score is the reliability score (out of 10) of the alternative worker minus the benchmark worker. \textit{Reliability score is shown} is 1 when the reliability score is shown. Relative reliability score is coded as 0 when it is not shown.
\item $\dagger$ The dependent variable mean when the worker is trans in the \textit{No discussion (private)} arm indicates that the transgender worker was selected (rather than the male benchmark worker) \input{../../outputs/stats/p_choose_trans_control.tex} of the time. The mean when the worker is male or female in the \textit{No discussion (private)} arm is above 50\% because participants on average prefer female alternative workers to the male benchmark workers.
\end{tablenotes}
\end{table}

\begin{figure}[!htbp]
		
		\centering
		\caption{Effect of 3-person discussion on private choices in outcome round}
			\includegraphics[width=0.55\linewidth]{../../outputs/figs/main_graph_point.pdf}
		\label{fig_main_bar}
		\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: This graphically shows the main results from Table \ref{tab_main}. The y-axis shows the effect on the probability of choosing the \textit{alternative} worker relative to the case where \textit{Worker is non-trans} in the \textit{No discussion (private)} arm. 90\% and 95\% confidence intervals are based on column (1) of Table \ref{tab_main}, based on standard errors clustered at the group-of-3 level.
		\end{tablenotes}
	\end{figure}
	
	
 
	
	
	

	
	
%	The main effects thus hold even for participants who do not receive any information about transgender rights, suggesting the effects are not driven by interactions between the rights videos and the group discussions.
	
	
	
	
	
	

	

	
%Relatedly, there is no direct evidence of interactions between the \textit{legal rights} video and discussions (\autoref{fig_big_summary_fig} and \autoref{tab_video_interactions}), but weak evidence of a negative interaction effect between the \textit{rights messaging} video and the discussions, suggesting that these two interventions are substitutes. Throughout the paper, I present results that control for uninteracted treatments. The coefficients on the discussion should therefore be interpreted as conditional on the distribution of the other video treatment.\footnote{Cross-randomized designs that do not account for interaction terms can yield incorrect inference \citep{muralidharanFactorialDesignsModel2023}. I show interaction terms for completeness (\autoref{tab_video_interactions}), but since the results hold for participants who only saw the control video, interaction effects cannot be driving the main effects of the discussion.} 
	



%only groups who saw the control video also hold when restricting the sample 
	
%	Recent work in econometrics \citep{muralidharanFactorialDesignsModel2023} suggests that when using cross-randomized designs, regressions that do not account for interaction terms can yield incorrect inference. In the current context, the concern is that effect of the discussion may be driven by an interaction with the videos about transgender rights shown to the participants. Empirically, however, this is not a concern: Appendix \autoref{tab_control_vid_only} shows that the treatment effect of the discussion is robust and still significant at the 0.1\% level when restricting analysis to only groups who saw the control video. The main effects of the discussion hold even when participants do not receive any information about transgender rights. \footnote{In \autoref{tab_video_interactions}, I show the full set of interactions between rights videos and discussion arms. {\color{red}\textbf{COMMENT HERE}}.}


	
	To benchmark the size of the reduction in discrimination, I use the random variation in grocery items to infer the willingness to pay to avoid a transgender worker (\autoref{fig_wtp_to_avoid}). In the \textit{No discussion (private)} arm, participants are on average willing to sacrifice items worth Rs. \input{../../outputs/stats/wtp_control.tex} (approx. 6.30 USD PPP) to avoid selecting a transgender worker, corresponding to \input{../../outputs/stats/wtp_control_hh_exp.tex}x the median daily per capita food expenditure in the sample. By contrast, in the \textit{3-person discussion} arm, the willingness to pay to avoid is Rs. \input{../../outputs/stats/wtp_treat.tex} ($p$ of difference $<$ 0.001), and is no longer significantly different from 0 ($p$=\input{../../outputs/stats/wtp_treat_p_val.tex}). 
	%In Appendix \ref{sec_wtp_gmm}, I structurally estimate a model that allows for preferences to be correlated within participants and groups, and comes to similar conclusions.
%	\footnote{The model allows for distaste against transgender workers and correlation of distaste within groups. I estimate the willingness to pay to avoid a transgender person to be \input{../../outputs/stats/mu_control.tex} Rs. in the \textit{No discussion (private)} group, while in the discussion arm there is estimated to be a small preference in \textit{favor} of transgender workers of \input{../../outputs/stats/mu_treat.tex} Rs.}
	 The results therefore suggest that the discussion generates a large short-run reduction in discrimination.
	
	The effect size is similar when examining only \textit{costly} discrimination, i.e., when participants avoid a transgender worker who offers more items, has a higher reliability score, or both (\autoref{tab_dominates}, column 1).\footnote{I do not find evidence of an implicit preference against transgender workers, in which they are discriminated against mostly when there is no plausible deniability (e.g., when other attributes are different) \citep{de2022implicit}. Transgender workers are not disproportionately favored when their attributes are the same as the alternative worker, compared to when they are dominated or dominating (\autoref{tab_dominates}).}
	In the \textit{No discussion (private)} arm, even when shown a transgender worker that dominates on items or reliability score, or both, participants still select the non-transgender worker \input{../../outputs/stats/prop_when_dominates.tex} of the time. By contrast, in the \textit{3-person discussion} arm, this figure has reduced to \input{../../outputs/stats/prop_when_dominates_treat.tex} (difference: \input{../../outputs/stats/coeff_when_dominates_treat.tex} p.p., $p$\input{../../outputs/stats/p_val_only_dominates.tex}). The discussion also increases the probability of selecting a transgender worker when they offer fewer items or have a worse reliability score (\input{../../outputs/stats/coeff_when_dominated.tex} p.p., $p$\input{../../outputs/stats/p_val_only_dominated.tex}), implying that it also increases \textit{positive} discrimination in favor of transgender workers. 
	
There is heterogeneity in levels when separating the analysis of non-transgender workers into males and females (\autoref{fig_gender}).\footnote{Female workers were the most preferred gender in both treatment conditions and were selected \input{../../outputs/stats/p_choose_female.tex} of the time over the \textit{benchmark choice} (who was always male). Male workers, always being compared to other males, were mechanically selected around 50\% of the time. Transgender workers, however, were selected \input{../../outputs/stats/p_choose_trans_control.tex} of the time in the \textit{No discussion (private)} arm, but \input{../../outputs/stats/p_choose_trans_treat.tex} in the \textit{3-person discussion} arm. This implies that males were preferred to transgender people in the control condition ($p$\input{../../outputs/stats/p_val_trans_vs_men_control.tex}), but transgender people were preferred to males in the treatment condition ($p$\input{../../outputs/stats/p_val_trans_vs_men_treat.tex}).} Appendix \ref{sec_appendix_het} shows further heterogeneity results, indicating that (i) participants trade off their preferences between items and worker; (ii) participants statistically discriminate against transgender workers, but the discussions' effects are not driven by this; (iii) the discussion reduces discrimination by the same amount for male and female participants.
	
%	{\color{red}Possibly - talk about model estimation here}. 
	
%	This implies that participants are on average willing to sacrifice 
%	
%The gradient of the probability of selecting an option with respect to the difference in value of the items on offer across a pair 
	
%	I take the reduction in probability that an option is chosen when the worker is transgender 	
	
%	Specifically, I take the reduction in probability that an option is chosen when they are transgender, and divide it by the gradient of selecting an option with respect to the 
	
	
%	(\autoref{fig_wtp_to_avoid}). In the \textit{No discussion (private)} arm, participants on average sacrifice items worth Rs. \input{../../outputs/stats/wtp_control.tex} to avoid selecting a transgender worker, corresponding to \input{../../outputs/stats/wtp_control_hh_exp.tex}x the median daily per capita food expenditure in the sample. 
	



%\textbf{Implication --- }




%No effect on women - to men choice

%{\color{red}This para is too long:}



%{\color{red}As noted above, in the \textit{3-person discussion} arm, there is no significant difference between the probability of selecting a transgender and a non-transgender worker. However, this disguises heterogeneity when separating the analysis of non-transgender workers into males and females .  
%\footnote{This is partly driven by the high proportion of female participants (\autoref{tab_balance_pooled}), along with female participants' particular preference for female workers (\autoref{fig_gender_by_gender}).} 
   %The main results in \autoref{tab_main} and \autoref{fig_main_bar} pool the probabilities of selecting a male or female alternative worker instead of the male benchmark. This pooling yields the result that in the \textit{3-person discussion} arm, the probability of selecting the alternative worker was the same (approximately 60\%) for both transgender and non-transgender workers.
 





%\textbf{COSTLY DISCRIMINATION}



%{\color{red}ADD BACK IN SOMEWHERE HERE DOMINATED VS DOMINATING ***}

%{\color{red}You want to be more precise here about the definition of discrimination. Is discrimination just choosing a certain group less than a different one? A more strict definition would be that people are giving up money (or experience/quality) in order not to be served by a transgender person. In such cases, i.e., focusing on choices where discrimination is possible by this definition, what fraction of people discriminate?}


%Pooling males and females together yields the result that in the \textit{3-person discussion} arm, the probability of selecting the  

%there was no significant difference between the probability of selecting a transgender and a non-transgender worker 


%implying that they were selected less often than the male benchmark worker. But in the \textit{3-person discussion} arm, transgender workers were selected \input{../../outputs/stats/p_choose_trans_treat.tex} of the time, implying that they were on average preferred to the male benchmark worker. 

%were on average p

% Males were selected around 50\% of the time when compared to their male benchmarks, validating the balanced randomization of photos and characteristics.
%  In the \textit{No discussion (private)} arm, participants selected transgender workers less often than the male benchmark worker, resulting in an overall mean of \input{../../outputs/stats/p_choose_trans_control.tex}. {\color{red}But in the \textit{3-person discussion} arm, transgender workers were on average preferred to the male benchmark worker, resulting in them being selected \input{../../outputs/stats/p_choose_trans_treat.tex} of the time. Pooling the results for males and females together yields the result that in the \textit{3-person discussion} arm, there is no significant difference between the probability of selecting a transgender worker and a non-transgender worker (\input{../../outputs/stats/p_choose_non_trans_treat.tex}).}

%\textcolor{red}{Say something here about the fact that positive discrimination increases and negative discrimination decreases (simultaneously) - shift in distribution...}


%\subsection{Extra bits}


%
%
%Despite transgender workers having the same average reliability score as other genders, 
%
%
%
%
%
%
%
%
%
%Reliability scores are the same...
%
%I cannot adjudicate whether participants beliefs are correct
%
%
%(\autoref{tab_attitudes_beliefs_norms}, panel A, column 3; discussed below).
%
%
%
%
%{\color{red}ADD CITE on alex imas innacurate statistical discrimination}
%The negative stereotypes that portray transgender workers as unreliable or untrustworthy might make participants unwilling to select trans workers because they fear that the delivery won't be completed. {\color{red}Describe that transgender particiapnts are perceived as being less reliable at baseline}
%To test this hypothesis, half of the choice-pairs were randomly selected to include information about the reliability of both workers. 
%{\color{red}Clarify that trans workers are not actually lower quality...}



%.\footnote{Participants also react modestly to the \textit{value} of the reliability score. 

%They are \input{../../outputs/stats/reliability_sensitivity.tex} percentage points more likely to select an option for each additional point on the 10-point scale (\autoref{tab_main}, columns 2 and 3).} 
%The discrimination trans people face is thus at least partly belief-based, fitting with the results showing that transgender workers are rated as less likely to complete the delivery (\autoref{tab_attitudes_beliefs_norms}, panel A, column 3; discussed below).

%The discrimination transgender workers face seems to thus be at least partly belief-based, in line with the results discussed below that transgender workers are rated as less likely to complete the delivery (). 
%I do not have enough power to detect whether the 3-person discussion reduces this belief-based component of discrimination (\autoref{tab_statistical_discrim}, column 2). While the point estimate of the interaction of (\textit{Worker is trans} $\times$ \textit{Reliability score is shown} $\times$ \textit{3-person discussion}) is negative and large enough to negate the effect of (\textit{Worker is trans} $\times$ \textit{Reliability score}), I cannot reject that it is different from 0 ($p$ = \input{../../outputs/stats/p_val_effect_on_statistical_discrim.tex}).






%\textcolor{red}{Finally, the discussion consistently reduces discrimination across all 13 transgender worker photos used (\autoref{fig_treat_by_photo}). This suggests that other features of the worker photos do not drive the results. For example, suppose the transgender workers tended to appear to be poorer, and the discussion actually increased preferences for choosing poor people. We would nevertheless expect at least \textit{some} transgender workers to appear richer, leading to a coefficient estimate in the opposite direction for a subset of photos -- something that is not observed empirically.}




%{\color{red}CUT THIS PARAGRAPH - just say that treatment effect is same for men and women.}
%Finally, I evaluate whether the treatment effect varies according to participant demographics. The reduction in discrimination is similar across most demographic groups , including by gender, education, and household size (\autoref{tab_het_demo}). However, the 3-person discussion had a significantly larger effect among the 23\% of participants who had hired someone to work for them over the last 2 years  ($p$ = \input{../../outputs/stats/pval_employer.tex}).\footnote{There is also no detectable heterogeneity in the treatment effect with respect to either the persuasiveness of the group discussants or the relationships between group members (\autoref{tab_het_group}).} Hiring discrimination was thus reduced the most for the population of people who have  hired recently, providing suggestive evidence that the results could generalize to other employment scenarios.\footnote{In Appendix \ref{sec_types_of_discrim}, I also show suggestive evidence that reductions were fairly uniform across the distribution of baseline attitudes. First, I show that the discussion simultaneously increases positive discrimination in favor of transgender workers (when transgender workers are selected despite offering worse items and having worse characteristics), and \textit{also} decreases negative discrimination (when transgender workers are not selected despite better items and characteristics). Second, I show that the number of times a transgender was selected in the outcome round in a group increases across the whole distribution of groups. This contrasts with previous literature on group dynamics, which suggests that groups can lead to increased clustering at extreme behaviors (i.e., everyone discriminating or everyone selecting transgender workers) \citep{stonerComparisonIndividualGroup1961,myersGroupPolarizationPhenomenon1976a, schroederRiskyShiftGeneral1973, myersDiscussionEffectsRacial1970}.}  




%The reduction in discrimination caused by the discussion is similar across multiple demographic groups.



%The effect of the discussion is not significantly mediated by whether the participant was female, had a bachelor's degree, had children in the household, had a larger than median household, or had an above-median food expenditure (\autoref{tab_het_demo}). By contrast, the 3-person discussion had a significantly larger effect among the \input{../../outputs/stats/perc_employer.tex} of participants who had hired someone to work for them over the last 2 years ($p$ = \input{../../outputs/stats/pval_employer.tex}), but a significantly lower treatment effect for those who were employed ($p$=\input{../../outputs/stats/pval_employed.tex}). Hiring discrimination was thus reduced the most for the population of people who have  hired recently, increasing the likelihood that the results generalize to other employment scenarios. 











%- statistical discrimination - revealing the reliability score increases the probability of selecting a transgender worker by xxxxx, but not of other non-transgender 

%- discussion - not sufficiently well-powered to detect whether the discussion has an effect on this statistical discrimination... effect on this statistical discrimination factor is negative






			
		
	
	
	

\subsection{Effect of transgender rights videos}


	

%To compare the effects o

%{\color{red}Need some intro / link to discussion part, e.g., "what happens when shock to laws / norms"?}
The videos about transgender rights reduce discrimination by significantly less than the discussion (\autoref{tab_videos}). Both the \textit{Rights messaging} video and the \textit{Legal rights} video significantly increased the probability of selecting a transgender worker in the outcome round ($\beta$=\input{../../outputs/stats/effect_messaging_video.tex} p.p. and \input{../../outputs/stats/effect_law_video.tex} p.p., $p$$<$0.01). There is some evidence that the legal rights video has a stronger effect than the rights messaging video ($p \in$ [\input{../../outputs/stats/p_val_min_video_messaging_vs_law.tex}, \input{../../outputs/stats/p_val_max_video_messaging_vs_law.tex}], depending on the specification).
%
Endorsing transgender rights thus appears to reduce discrimination somewhat more effectively when it is backed by the legal authority of the Supreme Court. This implies that the law can be an important tool for reducing societal discrimination, and that raising awareness of the legal rights of minorities may be an underrated policy lever for addressing discrimination. However, the effect of top-down communication about the law is only \input{../../outputs/stats/effect_law_vs_discussion.tex} as large as the effect of the group discussion ($p$ of difference $\in [\input{../../outputs/stats/p_val_vid_vs_discussion.tex}]$). %
%This has important implications for the power of the law to address societal discrimination.%
Appendix \ref{sec_appendix_videos} shows further results on the rights videos, including (i) a manipulation check showing that the \textit{legal rights videos} affect people's beliefs about transgender rights; (ii) the interactions between the rights videos and the discussion; and (iii) how the rights videos affected the mechanism outcomes. 

%


%The rights videos operate by affecting participants' beliefs about norms and the reliability of transgender people, but not through private attitudes. 




%we might expect the results to be driven by those with a high social desirability score. Nevertheless, this result leaves open the possibility that the treatment effects of the videos are driven by response bias, namely, the desire to respond in a socially desirable way in front of the surveyor. This seems unlikely to be driving the full effect of the legal rights video, however, since the strong effect of the legal rights video also holds on the private grocery pick-up choices, which were hidden from the enumerator (\autoref{tab_videos_anon_choices}).


%{\color{red}ADD something about p(Rights video = Discussion) somewhere?}

% examine the interactions between the rights videos and the discussion-arm treatments. 

\begin{table}[htbp]
\caption{Effect of rights videos on private choices in outcome round}
\label{tab_videos}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/videos.tex}
}
\begin{tablenotes}

\item	\footnotesize \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Randomization inference p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample includes all participants in both phases, in all discussion-arm treatments. Controls include dummies for the discussion-arm treatments. The specifications are otherwise the same as \autoref{tab_main}. 

%Column (3) only includes choices that involved a transgender worker. 
%In columns (1) and (2), the outcome is whether the \textit{alternative worker} (rather than the male \textit{benchmark worker}) in the private choices in the \textit{outcome round}. In column (3), it is whether the transgender worker was selected. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female. 
%The mean of the dependent variable when the worker is trans and in the \textit{No discussion (private)} arm indicates that the transgender worker was selected (rather than the male benchmark worker) \input{../../outputs/stats/p_choose_trans_control.tex} of the time. The mean when the worker is male or female in the \textit{No discussion (private)} arm is above 50\% because participants on average prefer female alternative workers to the male benchmark workers.
%The specification used is seen in equation \ref{eqn_main_spec}.  Controls include stratum fixed effects; dummies for the discussion-arm treatments; whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; whether the alternative worker was shown on the right; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). In column (2), controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown. Columns (2) and (3) also include controls for the relative \# items offered by the alternative worker, the relative reliability score of the worker, and a dummy for whether the reliability score was shown. randomization inference \textit{p}-values at the base of the table test for differences between treatment effects across treatment arms, i.e., for differences in the interacted terms in columns (1) and (2), and differences in the uninteracted terms in column (3).
\end{tablenotes}
\end{table}

%\clearpage
 


%\textcolor{red}{ADD COMMENTARY ON INTERACTION EFFECTS IN FIGURE / SUMMARY FIGURE}



%\subsubsection{Mechanisms}



%The videos have a significant effect on perceived descriptive norms of discrimination (\autoref{tab_videos_norms}), in line with the \textit{expressive law hypothesis} \citep{benabouLawsNorms2011, sunsteinExpressiveFunctionLaw1996, mcadamsNormsLawEconomics2004, laneLawNormsEmpirical2019}, which states that the law can affect behavior by signaling the prevailing social norm. After seeing either treatment video, participants predict that others will select will select transgender workers more, both in the wider community (2--3 p.p.), and in their group of 3 (4--6 p.p.). 



%\textcolor{red}{Make sure I reference to Appendix \ref{sec_data} somewhere for design of intermediate outcomes}


%\textcolor{red}{REJIGGLED SECTION ORDER - shorten this section - very short, not a subsection???}

%While the discussion leads to changes in perceived norms and attitudes, the effects of the rights videos are mediated by changes in perceived norms and beliefs about reliability. The videos have a significant effect on perceived descriptive norms of discrimination (\autoref{tab_videos_norms}), in line with the \textit{expressive law hypothesis} \citep{benabouLawsNorms2011, sunsteinExpressiveFunctionLaw1996, mcadamsNormsLawEconomics2004, laneLawNormsEmpirical2019}, which states that the law can affect behavior by signaling the prevailing social norm.  After seeing either treatment video, participants predict that others will select will select transgender workers more, both in the wider community (2--3 p.p.), and in their group of 3 (4--6 p.p.). For community-wide norms, the effect of the legal rights video is similar to the effect of the group discussion ($p$=\input{../../outputs/stats/p_norm_community_diff_vid_discuss.tex}). But for group-level norms, the discussion has a much stronger effect ($p$\input{../../outputs/stats/p_norm_group_diff_vid_discuss.tex}). This hints that the larger effects of the discussion may be mediated by the effects on group norms.



%The videos also lead to small increases of around \input{../../outputs/stats/effect_belief_vid.tex} p.p. (\input{../../outputs/stats/effect_belief_vid_perc.tex}) in the probability that a participant reports that a transgender worker is likely to complete the delivery (Appendix \autoref{tab_videos_attitudes}, column 3). By contrast, neither video has a detectable effect on attitudes as measured by the list experiment or the questions on disapproval of discrimination (Appendix \autoref{tab_videos_attitudes}, columns 1-2). 

%Participants in the discussion often say that one should choose a transgender in order not to discriminate, similar to the video that says transgender persons should have rights. By contrast, we did not record any incidences of participants directly mentioning the legal rights of transgender people in the discussion in any video arm. The \textit{legal rights} video therefore provides additional informational content that is not a perfect substitute for being told by a group member that one should not discriminate.\footnote{I also cannot rule out other non-mutually-exclusive explanations. There might be a substitution effect between the change in attitudes and perceived norms generated by both types of rights videos, and the group discussion. The \textit{legal rights} video simply continues to have an effect in the discussion conditions because it generates a larger initial shock to attitudes and norms, and, in line with that, more pro-trans discussion (\autoref{fig_reasons_r1_by_video}). This outweighs the natural substitution effect, whereas for the \textit{rights messaging} video the effect is not enough to outweigh the substitution effect. The \textit{legal rights} video may also operate through channels of \textit{social cover}, in line with \citet{bursztynJustifyingDissent2023}. Both types of treatment videos generate changes in second-order beliefs; people believe that others are more trans when they see either treatment video. However, participants do not know if others in their group know what video they saw, and being told about actual legal rights may be more likely to create the impression of \textit{common knowledge} of the law, because it is a societal fact that others plausibly might have known before.  If a participant thinks that others believe that she knows about the law, then she might face additional social pressure in the discussion to behave in a pro-transgender way. Just anticipating others to be more pro-trans based on the rights messaging video may not be sufficient to generate complementarity between the discussion and video that is enough to outweigh the substitution effects.}




%There are at least two plausible explanations for this pattern (which are not mutually exclusive). 

%\textit{Natural substitution effect- due to norm shift or attitude shift having similar effects...}


%\textit{Informational content of the rights messaging videos - very similar to narratives used in the discussion}





% plausibly more likely to create the impression of \textit{common knowledge} of the law: a participant might anticipate that others know, and others know that she know. 


%i.e. people anticipate that others are persuaded to change their behavior by the video. 

%\footnote{We tell participants that different people in the study are shown different videos, which will tend to reduce the effect of this \textit{common knowledge} channel on behavior. But people may believe that others know about the law regardless of whether they saw the video.} If the video explaining about the legal rights of transgender people signals that there is \textit{common knowledge} of a pro-trans norm, e.g., because others knew about the law even before watching the video, then this may make them more comfortable expressing pro-trans opinions in a group compared to the rights-messaging video. This would be one reason for the increases in pro-social explanations for choosing transgender people in the discussion for groups that saw the legal rights video. (\textbf{CITE e.g., Patricia Funk study??  Also: write better: law doesn't just persuade others that saw it, it also signals that there may be common knowledge of a social norm. Does this fit with stuff on the community-level norms - not really)}


 
%
% may more credibly signal a widespread prevailing norm 
%
%
%
%
%
%- partially mediated by group norms? (While rights messaging has effect on group predictions in control group, it has an effect on group predictions in discussion groups). 
%
%Credible signal of social norm?
%
%
%
%It changes my mind, but I don't think it changes other people's minds??
%
%\textit{COMMON KNOWLEDGE} - 
%law signals a widespread social norm, so everyone thinks that everyone thinks everyone is more pro-trans (3rd-order belief)
%
%- that would make discussion more positive in law case...?
%
%
%whereas because we try and shut down the common knowledge case by saying everyone saw different videos; you might think others are persuaded but you don't know
%
%
%
%
%
%
%
%{\color{red}Why would this be the case? Kind of unclear}
%
%law = credible signal of social norm that is enhanced by observing poeple be pro-trans... (in line with e.g., Patricia Funk etc.)
%
%
%Activism - I interpret others pro-trans behavior as being just result of message , rather than them really changing attitudes?
%
%Law - I interpret others pro-trans behavior as being result 






%\textbf{NEXT: video interactions}




%both lead to small increases in the predicted probability that someone in the participants' community will select a transgender, and the legal rights video significantly increases the predicted probability that someone in 
%($p=$\input{../../outputs/stats/p_effect_placebo_beliefs.tex} for the messaging video, and $p=$\input{../../outputs/stats/p_effect_law_beliefs.tex} for the legal rights video). 



\subsection{Persistence after 2--9 weeks}
\label{sec_persistence}

%\autoref{tab_follow_up} {\color{red}COMMENT ON RESULTS}

%{\color{red}Add notes to tab folloow up}

To examine whether the effects of the discussion and the rights videos persist over the medium-run, I ran a follow-up survey when the delivery was carried out. This survey took place an average of \input{../../outputs/stats/follow_up_lag_mean.tex} days after the initial survey (SD: \input{../../outputs/stats/follow_up_lag_sd.tex} days). \input{../../outputs/stats/attrition_perc.tex} of the sample were found, and there was no evidence of differential attrition  (\autoref{tab_diff_attrition}). For the follow-up, discrimination was measured using 6 hypothetical hiring choices designed to be as similar as possible to the main hiring choices. All of these questions were asked individually and in private. The questions used a new set of worker photos and new types of grocery items. 

%{\color{red}Comment on control level of discrimination}

%{\color{red}$tab_spillovers$}

%{\color{red}Add stat on correlation between hypothetical vs earlier real choices...}




\begin{table}[htbp!]
\centering
\caption{Medium-run effects of discussions on hypothetical hiring choices (2-9 weeks)}
\label{tab_follow_up_a}
%\small Effect of 3-person discussion (3-person discussion sample, phases 1 + 2)
\centering
\vspace{-1em}
\normalsize
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/follow_up.tex}
}

\footnotesize
\begin{tablenotes} \scriptsize
\item \textit{Notes:} * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Randomization inference p-values are in brackets. Sample includes the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phases 1 and 2. Controls include dummies for the rights videos, as well as the other controls specified in Table \ref{tab_main}. In the follow-up survey, workers in a pair always had the same reliability score and offered the same number of items. Specification is otherwise the same as Table \ref{tab_main}. As prespecified, for analysis, I drop the \input{../../outputs/stats/prop_purple.tex} of the sample who were randomly selected to actually receive a delivery from a transgender worker.
\end{tablenotes}
\end{table}



%
%	\begin{table}[htbp!]
%	\centering
%
%	\caption{Medium-run effects of discussions and videos on hypothetical hiring choices (2-9 weeks)}
%	\label{tab_follow_up}
%\small \textbf{Panel A:} Effect of 3-person discussion (3-person discussion sample, phases 1 + 2)
%	\centering
%\vspace{-1em}
%
%	\normalsize
%		\begin{subtable}[t]{0.9\textwidth}
%\centering
%\resizebox{\textwidth}{!}{
%\input{../../outputs/tables/follow_up.tex}
%}
%		\end{subtable}
%	\vspace{1em}
%	
%
%\small \textbf{Panel B:} Effect of transgender rights videos (all participants)
%		\centering
%\vspace{-1em}
%
%	\normalsize
%		\begin{subtable}[t]{0.9\textwidth}
%	\resizebox{\textwidth}{!}{
%		\input{../../outputs/tables/follow_up_video.tex}
%	}
%		\end{subtable}
%	\footnotesize
%	
%	\begin{tablenotes} \scriptsize
%	\item \textit{Notes:} * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Randomization inference p-values are in brackets. Sample in panel A includes the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phases 1 and 2. Sample in panel B includes all participants.
%%	Unit of observation is the participant $\times$ choice level.
%	Controls in panel A include dummies for the rights videos, and controls in panel B include dummies for the discussion-arm treatments, as well as the other controls specified in Tables \ref{tab_main} and \ref{tab_videos}.
%	In the follow-up survey, workers in a pair always had the same reliability score and offered the same number of items. 
%	Specification is otherwise the same as Tables \ref{tab_main} and \ref{tab_videos}.
%%
%%	
%%	
%%	 Column (3) only includes choices that involved a transgender worker. 
%%In columns (1) and (2), the outcome is whether the \textit{alternative worker} (rather than the male \textit{benchmark worker}) in the non-incentivized choices in the follow-up survey. In column (3), it is whether the transgender worker was selected. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female. The specification used is seen in equation \ref{eqn_main_spec}.  ; whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; whether the alternative worker was shown on the right; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). In column (2), controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown. 
%\end{tablenotes}
%%\flushright \textit{Continued on next page...}
%\end{table}






The 3-person discussion led to reductions in discrimination in these hypothetical choices that were still present after 2-9 weeks (\autoref{tab_follow_up_a}). Participants were approximately \input{../../outputs/stats/effect_discussion_follow_up.tex} p.p. more likely to select transgender workers in the hypothetical follow-up choices ($p$=\input{../../outputs/stats/p_val_discussion_follow_up.tex}). Approximately 25\% of the short-run effect thus remained after around 1 month. This is comparable to persuasion decay rates seen in the political science literature (e.g., \citet{hillHowQuicklyWe2013}, who estimate that 10-15\% of the initial effects of US TV political advertisements on voting remain after 30 days), although is less persistent than the effects in \citet{broockmanDurablyReducingTransphobia2016} and \citet{kallaReducingExclusionaryAttitudes2020}.
By contrast, the videos about transgender rights did not lead to a detectable persistent effect on discrimination ($p\in[\input{../../outputs/stats/follow_up_video_pvals.tex}]$, \autoref{tab_follow_up_videos}). 

Since the follow-up choices are hypothetical, the results are more vulnerable to concerns about experimenter demand effects and social desirability bias. However, experimenter demand effects are likely to be substantially \textit{larger} for the rights videos (which increase the probability of successfully detecting the purpose of the experiment from 5\% to 10\%, $p$$<$0.001, see Appendix \ref{sec_alt_mechanisms}). The fact that there were no persistent impacts for rights videos thus suggests that the discussions' effects were not driven by experimenter demand.
The results therefore suggest that the large short-term effects of the discussion on discrimination may translate into medium-run effects. This raises the possibility that even short interventions involving horizontal within-group communication could have persistent effects on behavior.


%In line with this concern, the control group discriminated less in the hypothetical follow-up survey than in the main survey (\input{../../outputs/stats/trans_penalty_follow_up.tex} p.p., difference: \input{../../outputs/stats/diff_r2_fu.tex} p.p., $p$\input{../../outputs/stats/diff_r2_fu_p.tex}). 



%
%\textcolor{red}{There is also a risk that participants from different treatment groups communicated with each other after the main survey. However, I find no evidence of geographical spillovers (\autoref{tab_spillovers}).}



%However, the probability that participants select transgender workers in the hypothetical follow-up does positively correlate with the probability in the incentivized outcome round of the main survey ($\rho$=\input{../../outputs/stats/cor_r2_fu_estimate.tex}, $p$\input{../../outputs/stats/cor_r2_fu_pval.tex} in the \textit{No discussion (private)} arm).\footnote{This is about half as large as the correlation between treatment round and outcome round choices during the main survey ($\rho$=\input{../../outputs/stats/cor_r2_r1_estimate.tex}, $p$\input{../../outputs/stats/cor_r2_fu_pval.tex}).} The results therefore suggest that the large short-term effects of the discussion on discrimination may translate into medium-run effects, while the information about transgender rights was not sufficiently impactful to have a medium-run effect. This raises the possibility that even short interventions involving horizontal within-group communication could have persistent effects on behavior.

%\textcolor{red}{ARGUE THAT VIDEOS BOUND THE EXPERIMENTER DEMAND EFFECTS HERE}









%
%may persist in the medium-run while the 
%
%
%It is important to keep in mind that the results are based this is not a revealed-preference measure, 
%
%
%
%and so it is more vulnerable to concerns about social desirability bias.
%
%The short 10-minute discussion is therefore not sufficient to alter people's behavior over the medium-run. For such changes, more intensive and repeated interventions are likely to be necessary.











%We asked 6 more hypothetical hiring choices with a new set of worker photos, and a different set of grocery items. We made clear to respondents that these choices would not result in actual deliveries.
 
 
% The follow-up on average took place \input{../../outputs/stats/follow_up_lag_mean.tex} days after the initial survey (SD: \input{../../outputs/stats/follow_up_lag_sd.tex} days). 





%the effects of the 3-person discussion do not persist over this 2 to 9 week period. There is still discrimination against transgender people: they are  \input{../../outputs/stats/trans_penalty_follow_up.tex} percentage points less likely to be chosen in the hypothetical follow-up questions ($p$ \input{../../outputs/stats/p_val_trans_penalty_follow_up.tex}). And individuals in the 3-person discussion arm are not more likely to select transgender people in these follow-up questions ($p$ = \input{../../outputs/stats/p_val_discussion_follow_up.tex}). 












%\section{Mechanisms: Effects on attitudes, beliefs about reliability, and norms}
%\section{Intermediate outcomes: effects on attitudes, beliefs, norms, and behavior during discussion}
%\label{sec_mechs_atts}

%To understand the mechanisms that could underly the effects of both horizontal and top-down communication about transgender people, I here examine a number of intermediate outcomes, including attitudes, beliefs, norms, and behavior during the discussion (see Appendix \ref{sec_data} for more detail on the design of each measure). I show that the discussion effects are primarily mediated by a large shift in group-level norms of behavior towards transgender people (\autoref{tab_attitudes_beliefs_norms}), i.e., changes in predictions about what other group members are doing. This shift comes along with very pro-trans behavior during the discussion itself. Later, I show the effects of the rights videos (\autoref{tab_videos_norms}, \autoref{tab_videos_attitudes}).







%{\color{red}The reductions in discrimination driven by the group discussion and the transgender rights videos could be mediated by changes in people's attitudes towards transgender people, their perceptions about norms of behavior towards transgender people, or their beliefs about whether transgender workers are likely to successfully carry out a delivery. To evaluate the importance of these three channels, I evaluate the treatment effects on the supplementary mechanism outcomes elicited after the main hiring choices. I first examine the effects of the group discussion (\autoref{tab_attitudes_beliefs_norms}). Later, I show the effects of the rights videos (\autoref{tab_videos_attitudes}, \autoref{tab_videos_norms}).}

%\subsection{Effects of 3-person discussion}



%
%	\begin{table}[!p]
%\caption{Attitudes, beliefs about reliability, and beliefs about others}
%\label{tab_attitudes}
%\resizebox{\textwidth}{!}{
%\input{../../outputs/tables/attitudes.tex}
%}
%\begin{tablenotes}
%\scriptsize	\item \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Randomization inference p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample includes only the \textit{No discussion (private)} and \textit{3-person discussion} arms, in both phases.
%\item \textit{Column (1)}: dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B. \textit{Question FEs} is a fixed effect for List B.
%\item \textit{Column (2)}: the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong". \textit{Question FEs} is a fixed effect for the second scenario.
%\item \textit{Column (3)}: the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker. Participants make two choices each, one of which includes a transgender photo. The order is randomized. \textit{Question FEs} controls for the order of the choice.
%%In columns (1) and (2), the outcome is whether the \textit{alternative worker} (rather than the male \textit{benchmark worker}) in the private choices in the \textit{outcome round}. In column (3), it is whether the transgender worker was selected. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female. 
%%The mean of the dependent variable when the worker is trans and in the \textit{No discussion (private)} arm indicates that the transgender worker was selected (rather than the male benchmark worker) \input{../../outputs/stats/p_choose_trans_control.tex} of the time. The mean when the worker is male or female in the \textit{No discussion (private)} arm is above 50\% because participants on average prefer female alternative workers to the male benchmark workers.
%%The specification used is seen in equation \ref{eqn_main_spec}.  
%\item Controls include stratum fixed effects; dummies for the rights-video treatments; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). 
%As pre-specified, columns (1) and (2) are adjusted for multiple hypothesis testing by calculating the q-value that controls for the false discovery rate \citep{andersonMultipleInferenceGender2008}.
%
%%In column (2), controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown. Columns (2) and (3) also include controls for the relative \# items offered by the alternative worker, the relative reliability score of the worker, and a dummy for whether the reliability score was shown. randomization inference \textit{p}-values at the base of the table test for differences between treatment effects across treatment arms, i.e., for differences in the interacted terms in columns (1) and (2), and differences in the uninteracted terms in column (3).
%
%\end{tablenotes}
%\end{table}


%\paragraph{Norms.}
%
%To examine whether descriptive norms of behavior towards transgender workers are affected by the treatment, I asked participants to make predictions about the \textit{private} (outcome-round) hiring choices of others. 
%
% \textit{Beliefs about others' choices (group).} Participants then made incentivized predictions of the private hiring choices of the other two people \textit{in their group}. For each of the other two group members, they were asked to predict which option the other person chose for two pairs of delivery options.
%
%   The discussion caused a large increase in the predicted probability that a fellow group-member selects a transgender worker (\autoref{tab_attitudes_beliefs_norms}, panel B, column 2; \input{../../outputs/stats/effect_norms_group.tex} p.p., $p$\input{../../outputs/stats/p_val_group_norms.tex}). 
% 
%The larger magnitude of the update in predictions \textit{within} the group suggests that the effect of the discussion is likely to be mediated by a shift in group-level norms. %\footnote{However, because the elicitation methods vary across the measures, comparisons of the magnitudes of the treatment effects should be interpreted with caution.} 
%In line with the claim that effects are mediated by group norms, controlling for group norms significantly attenuates the effect on discrimination, while controlling for community-wide norms does not (\autoref{tab_mediation}).







% Column (1) of \autoref{tab_attitudes} shows that the discussion does not have a significant effect on the proportion of people who agree with the anti-trans statement.
 
% Column (2) of \autoref{tab_attitudes} shows that the discriminator's behavior was seen as wrong in 93\% of cases in the control group, and this did not change significantly in the discussion group.

% Column (3) of \autoref{tab_attitudes} shows that participants are 7 percentage points less likely to say that the transgender worker was likely or very likely to complete the delivery, but that this is not significantly affected by the discussion.

%
%\begin{table}[!p]
%\caption{Predictions about others}
%\label{tab_norms}
%\resizebox{\textwidth}{!}{
%\input{../../outputs/tables/norms.tex}
%}
%\begin{tablenotes}
%\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Randomization inference p-values are in brackets. Sample includes all participants in the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phases. 
%\item Column (1): The unit of observation is the participant. The dependent variable is the incentivized prediction of the proportion of other people (how many out of 20) in the study who pick a transgender person to receive a delivery when shown a specific pair of workers. Each participant makes 3 incentivized predictions, one of which includes a transgender worker. Only the choice involving the transgender worker is included for analysis.
%\item Column (2): The unit of observation is the participant $\times$ prediction. The dependent variable is whether the participant predicted that another person in their group selected a transgender worker in the private outcome round. The prediction is incentivized. Each participant made 2 predictions (one involving a transgender worker) for each of their 2 group members. The two predictions involving a transgender worker are included for analysis.
%\item Controls include stratum fixed effects; dummies for the rights-video treatments; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). 
%As pre-specified, columns (1) and (2) are adjusted for multiple hypothesis testing by calculating the q-value that controls for the false discovery rate \citep{andersonMultipleInferenceGender2008}.
%\end{tablenotes}
%\end{table}




	
	
% that does not get corrected during the discussion, 

%To examine whether the main results on hiring choices could be driven by misperceptions and changes in participants' beliefs about others in their group, \autoref{fig_group_predic} shows whether participants on average under- or over-estimate the proportion of people in their group who select transgender workers. When they have not taken part in a group discussion, participants \textit{underestimate} the proportion of others who select transgender workers, with a mean prediction of \input{../../outputs/stats/group_predic_control.tex}, when actually people select the transgender \input{../../outputs/stats/group_predic_actual_control.tex} of the time for the options that received a prediction ($p$-value of difference: \input{../../outputs/stats/pval_misper_control.tex}). By contrast, in the \textit{3-person discussion} arm, this misperception has been corrected and there is even possibly a slightly overestimate of the proportion of others who select transgender workers, with a point estimate on the average misperception of \input{../../outputs/stats/misper_diff_treat.tex} percentage points ($p$=\input{../../outputs/stats/pval_misper_treat.tex}).






\section{Mechanisms: how does the discussion affect post-discussion discrimination?}
\label{sec_mechanisms_post_discussion}

How did group discussions reduce discrimination? I address this question in two sections. In this section, I use mechanism treatments and outcomes to identify how within-discussion behavior affected post-discussion choices at all. I show experimental evidence in favor of inter-personal persuasion, and suggestive correlational evidence that participants persuaded each other primarily by influencing perceived norms around hiring transgender workers, rather than by changing attitudes or beliefs.  
%
%
%
%%Section \ref{sec_mechanisms_asymmetry} then explains why discussions produced a large average reduction in discrimination by identifying three asymmetries: 
%
%
%%pro-transgender participants spoke up more frequently, participants misperceived the strength of anti-discriminatory norms, and moral arguments against discrimination carried particular rhetorical weight.
%
%
I then investigate why discussions created a large mean shift away from discrimination (Section \ref{sec_mechanisms_asymmetry}). Since standard models of communication (e.g., based on rational Bayesian agents) typically predict no changes in average beliefs, the substantial treatment effects require identifying asymmetries in the communication process. I provide suggestive evidence of three such asymmetries: pro-transgender participants spoke up more frequently, participants incorrectly updated and misperceived the strength of anti-discriminatory norms, and pro-trans arguments based on morality appear to have been particularly persuasive.




%Standard models of communication would predict that pro- and anti-transgender preferences should roughly cancel out during discussions. To explain the substantial reduction in discrimination, I identify three asymmetric forces: pro-transgender participants spoke up more frequently, participants incorrectly inferred stronger anti-discriminatory norms, and moral arguments against discrimination held a rhetorical advantage.


% choices first, showing how the within-discussion behavior influences post-discussion discrimination (this section)? Here I will use mechanism treatments to show that participants persuade each other, and that they appear to do so by infleuncing perceived norms of behaviour towards transgender workers (rather than so much to do with attitudes or beliefs changing).

%Second, how is there a large mean-shift in the post-discussion behaviour (Section \ref{sec_mechanisms_asymmetry})? Standard models of communication (e.g., based on rational Bayesian updaters) would suggest that participants may simply aggregate their information in the discussion... To explain the discusssions large short-run treatment effects, therefore, requires some form of ``asymmetry'' that means that forces pushing agianst and for discrimination do not simply cancel out. \textcolor{red}{DO I NEED TO DEFINE ASYMMETRY} I explore suggestive evidence in favor of three forms of asymmetry: (i) that pro-trans people are more vocal in discussions, (ii) that participants perform incorrect inference that leads them to overestimate the strength of an anti-discriminatory norm within their group, and (iii) ``rhetorical asymmetry'', i.e., that pro-trans participants are more persuasive becuase they make extensive use of moral arguments.

Appendix \ref{sec_alt_mechanisms} also provides evidence against a number of alternative explanations of the results, including (i) other characteristics of the photos of workers, such as perceived caste; (ii) social image concerns that affect even private, post-discussion choices; (iii) increased contemplation about choices; (iv) experimenter demand effects or social desirability bias; (v) increased salience of the notion of transgender; (vi) cheap talk due to low stakes; and (vii) discussion facilitators influencing discussions to be pro-trans.


%Building on the evidence above, I explore three candidate mechanisms for explaining why the pro-trans behavior during the discussion reduced post-discussion discrimination:
%\textcolor{red}{Having documented that participants behave favorably towards transgender workers during the discussion, partly driven by pro-trans participants being more willing to speak up, I next explore three candidate mechanisms that could explain how the pro-trans behavior during the discussion could affect private post-discussion discrimination:}

\subsection{Candidate mechanisms for affecting post-discussion behavior}

First, I explore three candidate mechanisms that could explain how behavior during the discussion could affect private post-discussion discrimination:
\begin{enumerate}[(1)]
  \item \textit{Norm-based persuasion}. Participants could persuade each other to discriminate less by changing perceived social norms. After observing frequent pro-trans statements and choices, participants may come to view discrimination as less socially acceptable or less prevalent. This perceived norm could then influence their post-discussion choices if they are motivated to adhere to social norms even in private.%\footnote{I remain agnostic on whether the effects are primarily driven by an update in the \textit{prescriptive norm} (what people think others believe one should do) or \textit{descriptive norm} (what people think others will do). The changes in both are likely to be highly correlated and have similar behavioral predictions.}
  \item \textit{Attitude- or belief-based persuasion}.\footnote{I denote both norm- and attitude-based influence as persuasion, since both mechanisms make very similar behavioral predictions: they both involve participants saying and choosing things that influence others to discriminate less \textit{in private} after the discussion ends. This follows previous literature that defines persuasion as when a sender sends a message to a receiver with a potential interest in affecting the receiver's behavior (e.g., \citealp{dellavignaPersuasionEmpiricalEvidence2010}), and work that includes \textit{social proof} as a key mechanism for persuasion \citep{cialdiniInfluencePsychologyPersuasion2009}.} Hearing pro-trans statements and choices could affect participants' personal attitudes or beliefs about transgender workers. For example, a participant could become more willing to interact with a transgender worker after a group member tells an anecdote of a positive interaction with a transgender person.
  \item \textit{Self-persuasion}. A participant may change her own attitudes towards transgender workers during the discussion, e.g., because she selectively searches for arguments in favor of her original view, and fails to fully adjust for this after the fact \citep{benabouNarrativesImperativesMoral2020, schwardmannSelfPersuasionEvidenceField2022}.
%  Pro-trans statements and choices in the discussion could update people's beliefs about the norm of behavior towards less discrimination, i.e., they believe that discriminatory behavior is less socially acceptable after the discussion. If participants are then motivated to follow this norm even in private, this could explain the post-discussion behavior.
%
%  
%  could update shift beliefs about the prevailing norm of behavior towards transgender worke
%  
%  people's beliefs about the prevailing prescriptive norm. If participants are tehn motivated to follow this prescriptive norm \textit{in private}, this could explain the post-discussion behaviour.
%  \item \textit{Persuasion}. Hearing pro-trans statements and choices from others altered people's personal normative beliefs about whether one should hire a transgender worker or their empirical beliefs (e.g., about reliabiilty).
%  \item \textit{Self-persuasion}. Participants making pro-trans statements and choices altered their \textit{own} personal normative beliefs (\textcolor{red}{CITE})
\end{enumerate}




%While some researchers classify both mechanisms as persuasion (since they involve a sender potentially influencing a receiver's behavior, see, e.g., \citealp{dellavignaPersuasionEmpiricalEvidence2010}), I distinguish between them to clarify whether effects are mostly driven by changes in perceived norms, attitudes, or beliefs.

%\textcolor{red}{NEED TO SAY AT SOME POINT THAT IT DOESN'T SEEM TO BE ABOUT ALTERING BELIEFS...}

%\textcolor{red}{NEED TO ADD SOMETHING ON PRESCRIPTIVE NORM VS DESCRIPTIVE NORM..., maybe just a footnote}

%\textcolor{red}{EXPLAIN MY BEST GUESS AT THE CHANNEL AT THE TOP??}

%\textbf{Summary}: I first document evidence in favour of channels (i) and (ii) and against (iii), since in the listener arm even silently listening to others discussing strongly reduces post-discussion discrimination, and because hearing more pro-trans discussions from others is strongly correlated with reductions in post-discussion discrimination. 

%I then use the \textit{No discussion (public)} arm to provide evidence specifically on the norm-updating channel. Just observing others' public choices can reduce discrimination, suggesting that a pure norm-update can affect discrimination. For the norm-updating, there is evidence that the discussion \textit{generates} a misperceived norm. If when pro-trans participants speak up, other participants do not sufficiently adjust for the fact that other participants are silent (not interpreting this as evidence of anti-trans views), and so they falsely come to believe that the prevailing prescriptive norm is more progressive than it really is (\textcolor{red}{SPIRAL OF SILENCE REF}). \textcolor{red}{NOTE - would have to be motivated to follow the norm even in private...}

%I then use the \textit{No discussion (public)} arm to provide evidence specifically on the norm-updating channel. Just observing others' public choices can reduce discrimination, suggesting that a pure norm-update can affect discrimination. In addition, the discussion majorly shifts people's predictions about their group members' choices shifts majorly with the discussion, and this shift appears to explain much of the variaiton in post-discussion discrimination. \textbf{SPIRAL OF SILENCE}. \textcolor{red}{For the norm-updating, there is evidence that the discussion \textit{generates} a misperceived norm. If when pro-trans participants speak up, other participants do not sufficiently adjust for the fact that other participants are silent (not interpreting this as evidence of anti-trans views), and so they falsely come to believe that the prevailing prescriptive norm is more progressive than it really is (\textcolor{red}{SPIRAL OF SILENCE REF}). \textcolor{red}{NOTE - would have to be motivated to follow the norm even in private...}
%}

%I then document evidence on persuasion. It is Empirically challenging to distinguish between norm-updating and persuasion, (if both can affect private behaviour), \textbf{because they have very similar behavioural predcitions (explain this more)}. Nevertheless, the available evidence suggests that the effects are mediated by changes in the perceptiopn of the norm of behaviour towards trans workers, rather than their personal attitudes or beliefs; because there are minimal changes in measures of personal attitudes, these do not mediate the effects on post-discussion discrimination. In addition, participants with more persuasive group members do not discriminate less... 


%\textcolor{red}{Change this para so it basically explains my best guess at the mechanism... (not the evidence for/against)}

%\textcolor{red}{need to update }




%\textcolor{red}{NOTE: norm-updating and persuasion are very similar behaviourally (both involve participants saying things that affect private behaviour subsequently. I call this ``influence'' in the previous section... use ``influence'' language???}

%\textcolor{red}{Footnote: relationship of terms compared to DellaVigna/Gentzkow.. they would call both norm-updating and persuasion as persuasion... ``We define a persuasive communication to be a message provided by one agent (a sender) with at least a potential interest in changing the behavior of another agent (a receiver).''}

Below, I show experimental evidence in favor of inter-personal persuasion (consistent with (1) or (2)). I also present correlational evidence that favors norm-based persuasion, while indicating that attitude- or belief-based persuasion are unlikely to be key drivers. 







%\textcolor{red}{The spiral of silence theory focuses on social image concerns; I show main driver is influence concerns}



% in line with theories of the ``spiral of silence'' 




%In tandem with the evidence on how pro-trans people are disproportionately likely to speak up, it also suggests a specific type of norm-updating based on the \textit{generation} of a misperception. If participants do not account for pro-trans people's disproportionate willingness to speak up, then observing the discussion make them think that their group are more pro-trans than they really are, i.e. generate a large update in perceived social norm, and reduce post-discussion discrimination.
% In line with this, I find evidence consistent with \textit{silence neglect} accentuanting the effects of norm-updating, in which participants only update towards a pro-trans norm when people speak up in favor of transgender workers, but fail to account for silence from other participants that may indicate anti-trans attitudes. (\textcolor{red}{refs}).\footnote{\textcolor{red}{This interpretation aligns with a misperceived social norm theory à la Bursztyn, Gonzalez, and Yanagizawa-Drott (2020), but, in this case, the norm would be misperceived as a result of the conversation itself.}}
 
% \textcolor{red}{More explicit link to spiral of silence theory here.... with references}





%When participants discuss transgender rights in their groups, those with negative views tend to remain silent. Consequently, the conversation is dominated by individuals with more positive views. If participants don't realize that this self-censorship is occurring (as discussed in Ho and Huang, 2024), they may incorrectly update their beliefs about how their group members think transgender individuals ought to be treated. In other words, they might come to believe that the prevailing prescriptive social norm favors positive treatment of transgender people. If participants are motivated to conform to this prescriptive norm even in private, they could engage in positive discrimination even in their private choices. This interpretation aligns with a misperceived social norm theory à la Bursztyn, Gonzalez, and Yanagizawa-Drott (2020), but, in this case, the norm would be misperceived as a result of the conversation itself.





%\textcolor{red}{The evidence strongly favors norm-updating, while indicating that persuasion and self-persuasion are unlikely to be key drivers. First, silent listeners in the \textit{2-person discussion} arm show substantial reductions in discrimination, ruling out self-persuasion since these listeners never actively participated. Second, participants exposed to more pro-trans advocacy during discussions show larger reductions in discrimination, consistent with both norm-updating and persuasion channels. Third, observers in the \textit{No discussion (public)} treatment, who simply correct their beliefs about the prevalence of pro-trans choices among others in their group also reduce post-discussion discrimination, indicating that a ``pure'' norm-update is sufficient for reducing discrimination. Finally, the discussion's treatment effects appear to be much more strongly mediated by changes in beliefs about other group members' actions (a measure of social norms) than of own personal attiudes; suggesting that norm-updating is likely to be more important than persuasion. I also find evidence consistent with \textit{silence neglect} accentuanting the effects of norm-updating, in which participants only update towards a pro-trans norm when people speak up in favor of transgender workers, but fail to account for silence from other participants that may indicate anti-trans attitudes. (\textcolor{red}{refs})}





%I find evidence that norm-updating is strongly supported by the data, since people's predictions about their group members' choices shifts majorly with the dsicussion, this shift appears to explain much of the variaiton in post-discussion discrimination, and simply observing others' choices in the \textit{No discussion (public)} arm is sufficient to reduce discrimination as well. It is Empirically challenging to distinguish between norm-updating and persuasion, (if both can affect private behaviour), \textbf{because they have very similar behavioural predcitions}. 





%\textcolor{red}{I cannot rule out (iii)? but effects are likely to be small} Also talk about heterogeneity.


%\textcolor{red}{OVERALL SUMMARY}


%\textcolor{red}{Norm-updating is strongly supported by the data....}

%\textcolor{red}{Empirically challenging to distinguish between norm-updating and persuasion, if both can affect private behaviour. There is evidence that the content of the discussion matters, which may be evidence of persuasion that changes people's personal normative beliefs but could also be signal prescriptive norm.... And also some suggestive evidence against persuasion (attitudes don't change much, persuasiveness index)}





%\textcolor{red}{For both channels, the disproportionate willignness of pro-trans participants to speak up is important for driving an average shift towards less discrimination. }

%I then use the \textit{No discussion (public)} arm to provide evidence specifically on the norm-updating channel. Just observing others' public choices can reduce discrimination, suggesting that a pure norm-update can affect discrimination. For the norm-updating, there is evidence that the discussion \textit{generates} a misperceived norm. If when pro-trans participants speak up, other participants do not sufficiently adjust for the fact that other participants are silent (not interpreting this as evidence of anti-trans views), and so they falsely come to believe that the prevailing prescriptive norm is more progressive than it really is (\textcolor{red}{SPIRAL OF SILENCE REF}). \textcolor{red}{NOTE - would have to be motivated to follow the norm even in private...}



%I then document suggestive evidence on persuasion; there is evidence in favor of persuasion based on the narratives that individuals hear in the discussion, because \textcolor{red}{some features of the content of the discussion (e.g., how much people discuss morality)} are highly correlated with whether people discriminate less after the discussion.

%\textcolor{red}{NEED TO DESCRIBE THE MAPPING BETWEEN norms/persuasion and statements/choices}

%Note that the behavioural predictions of both channels are very similar.








%\textcolor{red}{Signposting}
%
%\subsection{Framework: norm-updating and persuasion}
%
%\textcolor{red}{Need to discuss listener treatment TOP???}
%
%Below I document evidence that two similar mechanisms could be going on simultaneously. In the discussion, pro-trans participants are more likely to speak first and dominate the conversation. When they make pro-trans statements, this could simultaneously lead to:
%\begin{enumerate}
%  \item \textit{Norm-updating}. Pro-trans statements and choices could people's beliefs about the prevailing prescriptive norm. 
%  \item \textit{Persuasion}. Pro-trans statements and choices could alter people's personal normative beleifs about whether one should hire a transgender worker.
%\end{enumerate}
%For both channels, the disproportionate willignness of pro-trans participants to speak up is important for driving an average shift towards less discrimination. 
%
%For the norm-updating, there is evidence that the discussion \textit{generates} a misperceived norm. If when pro-trans participants speak up, other participants do not sufficiently adjust for the fact that other participants are silent (not interpreting this as evidence of anti-trans views), and so they falsely come to believe that the prevailing prescriptive norm is more progressive than it really is (\textcolor{red}{SPIRAL OF SILENCE REF}).
%
%At the same time, there is evidence in favor of persuasion based on the narratives that individuals hear in the discussion, because \textcolor{red}{some features of the content of the discussion (e.g., how much people discuss morality)} are highly correlated with whether people discriminate less after the discussion.
%
%Note that the behavioural predictions of both channels are very similar.
%



\subsection{Evidence consistent with persuasion}

I first present evidence that is compatible with both forms of inter-personal persuasion (norm- and attitude-based), but is not supportive of self-persuasion. Additional suggestive evidence is presented in Appendix \ref{sec_appendix_persuasion}.

%\subsection{Persuasion}

%A third channel that could explain the effects of the discussion is that (i) people persuade each other to change their behavior with the narratives and justifications they share during the discussion, and that (ii) persuasive communication is predominantly in favor of transgender workers. 

\textbf{Effect of listening to discussion}. Silent listeners in the \textit{2-person discussion} treatment reduce discrimination as much as active discussants, suggesting that the effects are driven by what participants hear in the discussion from others (inter-personal persuasion), rather than through self-persuasion. Despite not speaking, listeners discriminate \input{../../outputs/stats/effect_listener.tex} p.p. less after the discussion ($p$$<$0.001, \autoref{tab_mech_r1_r2}), an effect statistically indistinguishable from both 2-person speakers ($p$=\input{../../outputs/stats/p_val_listener_speaker.tex}) and 3-person discussion participants ($p$=\input{../../outputs/stats/p_val_listener_full_discussion.tex}). The effects persist in the 2-9 week follow-up ($\beta$=\input{../../outputs/stats/coeff_listener_fu.tex} p.p., $p$\input{../../outputs/stats/p_val_listener_fu.tex}, \autoref{tab_mechs_follow_up}), and are similar for a short-run outcome that is completely private, i.e., unobserved by neighbors and enumerators ($\beta$=\input{../../outputs/stats/anon_effect_listener.tex} p.p., $p$=\input{../../outputs/stats/anon_effect_listener_p.tex}, \autoref{tab_anon}, see Appendix \ref{sec_alt_mechanisms}). These results suggest that the effects are maintained even when post-discussion behavior is private, consistent with persuasion mechanisms.



%and appear even in completely private choices, indicating that listeners internalize rather than merely conform to the views they hear.


%To test whether participants behavior is affected by what they heard in the discussion (channels (i) and (ii)), rather than through taking part in the discussion (channel (iii)), I examine the effects on the \textit{listener} in the \textit{2-person discussion arm}, who silently listened to two other participants taking part in a discussion. 

%Listening led to large and significant reductions in subsequent private discrimination (\input{../../outputs/stats/effect_listener.tex} p.p., $p$$<$0.001, \autoref{tab_mech_r1_r2}). This effect is not significantly different from the effect of speaking in either the 2-person discussion ($p$=\input{../../outputs/stats/p_val_listener_speaker.tex}) or the 3-person discussion ($p$=\input{../../outputs/stats/p_val_listener_full_discussion.tex}). Since the listener was silent, they changed their private behavior solely based on being persuaded by the choices and justifications they heard from others in the discussion. The effect of the discussion therefore does not operate through self-persuasion or self-consistency channels, where active participation in the discussion is crucial for generating reductions in discrimination \citep{falkConsistencySignalSkills2017, schwardmannSelfPersuasionEvidenceField2022}. 

%Moreover, the effects on listeners are persistent and apply when they are behaving in total privacy, providing evidence in favor of persuasion \textcolor{red}{or of behaviour where people want to follow a prescriptive norm even in private}. In the individual follow-up (when group members are very unlikely to be present) they are still \input{../../outputs/stats/coeff_listener_fu.tex} p.p. more likely to select a transgender worker  ($p$\input{../../outputs/stats/p_val_listener_fu.tex}, \autoref{tab_mechs_follow_up}). The discussion also reduces listeners' discrimination even on a short-run outcome that is completely private (i.e., unobservable by neighbors) (see Section \ref{sec_alt_mechanisms} and \autoref{tab_anon}).

\textbf{Heterogeneity by pro-trans discussion}. Participants who hear more pro-transgender statements from others discriminate less afterward: each time a participant hears a group member speak positively about a transgender option (out of a maximum of 4, according to enumerator observations) is associated with a \input{../../outputs/stats/het_by_posneg_discussion_full_coeff.tex} p.p. reduction in discrimination after the discussion ($p$$<$0.001, \autoref{het_by_posneg_discussion}).\footnote{The coefficients on pro-trans discussions are not significantly different when also controlling for the probability of hearing anti-trans discussions.} This pattern also holds for silent listeners (\input{../../outputs/stats/het_by_posneg_discussion_listener_coeff.tex} p.p., $p$$<$0.001, right panel), whose results are more likely to reflect causal effects since they have minimal opportunity to endogenously affect their group members' behavior. These correlational results are consistent with either norm-based persuasion (since more pro-trans discussions more strongly signal a pro-trans norm) or attitude or belief-based persuasion (since more pro-trans arguments are more likely to change attitudes or beliefs). 


%If other group members speak positively about transgender workers during the 3-person discussion, participants are significantly more likely to select transgender workers in their subsequent private choices (\autoref{het_by_posneg_discussion}, $p$$<$0.001). This pattern also holds for silent listeners (\textcolor{red}{detail}), whose results are more likely to reflect causal effects since they have less opportunity to endogenously affect their group members' behavior. 

%\textcolor{red}{ADD FOOTNOTE ABOUT NEGATIVE DISCUSSION??}








%\textcolor{red}{This again is evidence against the third channel (self-persuasion) and supports the norm-updating or persuasion channels.}




%\textbf{Heterogeneity by how positive the discussion is}. Also strongly in favour of channels (i) and (ii) is the evidence of heterogeneity of post-discussion discrimination with regards to how pro-trans their discussion was. How much an individual is likely to select a trans worker in their private post-discussion choices is strongly increasing in the probability that the other group members speak positively about transgender workers in the discussion, as per enumerator responses to the question ``who spoke positively about a transgender worker?'' (\autoref{het_by_posneg_discussion}, $p$<0.001). This is true for the 3-person discussion arm and also find a Similar pattern for listener, where there is less concern of participants endogeneously affecting other group members' behaviour (although listener is noisier due to smaller sample size)... This again is evidence against the third channel (self-persuasion) and supports the norm-updating or persuasion channels.

%We also observe that when there is \textit{no} pro-trans discussion from other group members (P(Other group members spoke pro-trans) = 0), participants tend to behave approximately simiarly to the control group. \textcolor{red}{FOOTNOTE on NEGATIVE DISCUSSIONS} This tells us something important about the effect of the discussion: in the norm-updating world, it could imply that there is \textit{silence neglect}, where people wrongly ignore the fact that people staying silent may actually indicate that they are discriminatory, and incorrectly interpret that as a lack of signal. Alternatively, if the results are driven by persuasion, it could be explained by an attitudinal (preference-based) model where one needs to receive \textit{new arguments or narratives} in order to change ones' normative beliefs: silence is not enough to persuade, even if that silence has informational content.


%\textcolor{red}{Link to rhetorical asymmetry??}





	\begin{figure}[!htb]
		
		\centering
		\caption{More pro-trans discussions have more positive effects on post-discussion discrimination}
			\includegraphics[width=\linewidth]{../../outputs/figs/het_by_posneg_discussion}
		\label{het_by_posneg_discussion}
		\begin{tablenotes}
			\item \vspace{-1.5em} 	\footnotesize \textit{Notes}: \textit{P(Other group members spoke trans)} (x-axis) is the proportion of (discussion choice $\times$ participants) that spoke positively about a transgender option. There were two other group members, each with two opportunities to speak positively about a transgender worker. \textit{P(Chose trans in outcome round)} (y-axis) is the probability a participant chose the transgender worker in the private outcome round. Plotted line of best fit and 95\% confidence intervals are based on raw data with no controls. Left panel uses 3-person discussion participants in phases 1 and 2; right panel uses listeners in phase 2. Slope and p-values come from models with LASSO controls and all other controls in \autoref{tab_main}. $\beta$=0.28 implies that each additional positive discussion from a group member corresponds to being $28/4=$\input{../../outputs/stats/het_by_posneg_discussion_full_coeff.tex} p.p. more likely to select a transgender worker in the outcome round. \textit{$H_0 ($intercept=control$)$} tests whether a discussion participant for whom \textit{P(Other group members spoke trans) = 0} exhibits different post-discussion discrimination from the control group.
			
			
			
%			Coefficient in black is using model with LASSO, all other controls in Table 1; Plotted slope is regression with no controls, standard errors clustered at group-of-3 level. \textcolor{red}{What is P(Other group members spoke pro-trans)}. There are two other group members, each with two opportunities to speak positively about a transgender worker, so $\beta$=0.28 implies that each additional positive discussion corresponds to being $28/4=$\input{../../outputs/stats/het_by_posneg_discussion_full_coeff.tex} p.p. more likely to select a transgender worker in the outcome round. \textcolor{red}{CHECK WHAT THE SAMPLE IS HERE -- phase 1, 2 or both??? Need to be consistent...}
			%Sample includes all participants in the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phases.  Unit of observation is participant $\times$ prediction. Only choices that include a transgender photo are included. Hollow bars represent the probability that a participant predicts that their group-member selects a transgender delivery worker. The prediction was incentivized. Each participant made 2 predictions (one involving a transgender worker) for each of their 2 group members. The two predictions involving a transgender worker are included for analysis. Filled bars represent the actual probability that participants select a transgender worker in the outcome round (restricting to only choices for which another group member made a prediction).
		\end{tablenotes}
	\end{figure}
	
	
	


%\textcolor{red}{POSSIBLY CUT?? or move, or footnote}






\begin{table}[!htbp]
	\caption{Effect of mechanism treatments (listener, public, observer)} %\textcolor{red}{PROBABLY REMOVE THIS / APPENDIX}}
	\label{tab_mech_r1_r2}
	\centering
	\resizebox{0.85\textwidth}{!}{
		\input{../../outputs/tables/mechs_combine_r1_r2.tex}
	}
	\begin{tablenotes}
%		\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
%		\item Controls include items and quality stuff
%		\item 
		
		\item	\scriptsize \textit{Notes}: 
		* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses.
Randomization inference p-values are in brackets. 
Sample includes all treatment arms in phase 2 of data collection. In column (1), the \textit{Listeners} are excluded (since they only make choices after they have observed the discussion).
The specification used is otherwise the same as Tables \ref{tab_main} and \ref{tab_videos}, columns (3).
\textit{No discussion (public), pooled} includes both \textit{Observers} and \textit{Non-observers}, since both these types are treated the same until after the treatment round.
	\end{tablenotes}
\end{table}




\subsection{Evidence for norm-based persuasion}

%The evidence suggesting silence neglect, in combination with the evidnece that the conversation is dominated by individuals with more positive views, suggests that participants may be incorrectly updating their beliefs towards thinking that their group are highly pro-trans. 




%\textbf{Norm updating channel } - When participants discuss transgender rights in their groups, those with negative views tend to remain silent. Consequently, the conversation is dominated by individuals with more positive views. If participants don't realize that this self-censorship is occurring (as discussed in Ho and Huang, 2024), they may incorrectly update their beliefs about how their group members think transgender individuals ought to be treated. In other words, they might come to believe that the prevailing prescriptive social norm favors positive treatment of transgender people. If participants are motivated to conform to this prescriptive norm even in private, they could engage in positive discrimination even in their private choices. This interpretation aligns with a misperceived social norm theory à la Bursztyn, Gonzalez, and Yanagizawa-Drott (2020), but, in this case, the norm would be misperceived as a result of the conversation itself.

Having shown that the results are driven by participants influencing \textit{each other's} post-discussion behavior, I now document descriptive evidence in favor of norm-updating as the primary channel.


%\textcolor{red}{Here I document pieces of evidence in favour of norm-updating:}

\textbf{Effect of discussion on within-group norms}. Discussions had large effects on participants' beliefs about their peers' behavior, providing strong evidence for norm-updating. In an incentivized prediction task, participants predicted whether their two group members selected transgender workers in private post-discussion choices (see Appendix \ref{sec_data}). The discussion increased these predicted probabilities by \input{../../outputs/stats/effect_norms_group.tex} p.p. (\autoref{tab_attitudes_beliefs_norms}, panel B, column 2, $p$\input{../../outputs/stats/p_val_group_norms.tex}). This change in perceived within-group norms appears to be a key mediator of the main treatment effects: controlling for within-group norms significantly weakens the effect on private discrimination (\autoref{tab_mediation}), while controlling for other mechanism outcomes (community-wide norms, disapproval of discrimination, beliefs about reliability) does not. Section \ref{sec_mechanisms_asymmetry} will discuss explanations for why norms shift so starkly away from discrimination.







%I find more direct evidence of \textit{silence neglect} that generates asymmetry in how people update their perceptions of the norm. When no other group members make pro-trans statements, participants predict that their group members will behave similarly to the control group, suggesting that silence does not substantially update them towards expecting more discrimination from their peers ($p$ of difference: \input{../../outputs/stats/het_by_posneg_discussion_full_predic_intercept.tex}, \autoref{het_by_posneg_discussion_predictions}). This suggests that participants neglect the informational content of silence (``silence neglect''), incorrectly ignoring that silence likely indicates that participants are anti-trans rather than just indifferent. In turn, participants who hear no pro-trans statements do not discriminate more in their private choices, suggesting that this silence neglect translates to no changes in discrimination (\autoref{het_by_posneg_discussion}).\footnote{
%For listeners, there is weak evidence that silent discussions increase discrimination ($\beta$=\input{../../outputs/stats/silent_discussion_non_linear_listener_coeff.tex} p.p., $p$=\input{../../outputs/stats/silent_discussion_non_linear_listener_p.tex}). I place less weight on this finding given the small sample of listeners with silent discussions ($N$=25) and that the effect is no longer significant under a specification linear in $P($\textit{Other group members spoke pro-trans}$)$ ($p$=\input{../../outputs/stats/silent_discussion_listener_p.tex}). However, it could indicate \textit{partial} rather than total silence neglect.
%} 





%  do not update their predictions towards predicting more discrimination






%\textbf{ASYMMETRIC UPDATING}
%
%
%When no group members make pro-trans statements, participants behave similarly to the control group ($p$=\input{../../outputs/stats/silent_discussion_p.tex} for 3-person discussion), suggesting that silence neither reduces nor increases discrimination.\footnote{
%For listeners, there is weak evidence that silent discussions increase discrimination ($\beta$=\input{../../outputs/stats/silent_discussion_non_linear_listener_coeff.tex} p.p., $p$=\input{../../outputs/stats/silent_discussion_non_linear_listener_p.tex}). I place less weight on this finding given the small sample of listeners with silent discussions ($N$=25) and that the effect is no longer significant under a specification linear in $P($\textit{Other group members spoke pro-trans}$)$ ($p$=\input{../../outputs/stats/silent_discussion_listener_p.tex}). However, it could indicate \textit{partial} rather than total silence neglect.
%%This may indicate partial rather than total silence neglect, although I place less weight on this evidence given that the sample size for listeners is substantially smaller (only 25 listeners had $P(Other group members spoke pro-trans) = 0$) and the negative effect relative to control does not persist when assuming a linear relationship ($p$=\input{../../outputs/stats/silent_discussion_listener_p.tex}).
%} This suggests that participants neglect the informational content of silence (``silence neglect''), incorrectly ignoring that silence likely indicates that participants are anti-trans rather than just indifferent. %\textcolor{red}{Link to misperception // spiral of silence // }
%
%\textcolor{red}{SILENCE NEGLECT CLAIM SHOULD BE MADE BASED ON SOBs NOT ON ENDLINE DISCRIM -- this maybe therefore needs to go in the norm--udpating section?? \autoref{het_by_posneg_discussion_predictions}}








%\textcolor{red}{Add note on: The "2-person discussion (predictions about speakers)" panel in ﬁgure Figure A26 does seem to hint to a misperceived social norm.}

%\textcolor{red}{[FIG: updating norms; with public as well...}




%overestimation of discrimination among their peers . In line with this, without any discussion particip

%The discussion could have led to a positive update on perceived social norms if it corrected a misperception in which people overestimated how discriminatory their group members are and this misperception is corrected in the course of the discussion. In line with this, the control group underestimates how often their group members choose a transgender worker by \input{../../outputs/stats/misper_diff_control.tex}p.p. ($p$\input{../../outputs/stats/pval_misper_control.tex}).

%However, the discussion does not seem to simply \textit{correct} a misperception, but if anything to \textit{generate} a misperception in favor of transgender workers, in line with \textcolor{red}{SPIRAL OF SIELNCE}: discussion participants overestimate the probability of selecting a transgender worker by 2.9 p.p. ($p$=0.10) after discussions.  This helps understand how the discussion could generate a larger effect on discrimination than a correction of the misperceived norm would imply. The creation of a misperception is also consistent with the evidence that pro-trans participants speak up more, and the evidence of silence neglect.


%One might think that the effect of the discussion is driven by participants initially thinking that others' are discriminatory (in line with the correction of a misperceived social norm in \textbf{Bursstyn et al}), but correcting that misperceived social norm during the dsicussion and allowing more pro-trans choices afgerwards. In fact, the discussion seems to not only \textit{correct} a misperceived underestimation of others' willingness to hire transgender workers, but to get participants to overshoot and to start partially overestimating instead.  While the control group underestimates how often their group members choose a transgender worker by \input{../../outputs/stats/misper_diff_control.tex}p.p. ($p$\input{../../outputs/stats/pval_misper_control.tex}), discussion participants overestimate by 2.9 p.p. ($p$=0.10) after discussions. This creation of a misperception is consistent with the evidence that pro-trans participants speak up more, and the evidence of silence neglect.



%Moreover, this appears to be key mediator of the main treatment effects


%The first piece of evidence in favor of the norm-updating channel is that the discussion leads to large updates in within-group norms. \autoref{fig_group_predic}  examines this by displaying participants'  \textit{within-group} predictions about others' private choices, and comparing them to the true probability of selecting a transgender worker. Participants made incentivized predictions of the private hiring choices of the other two people in their group. For each of the other two group members, they were asked to predict which option the other person chose for two pairs of delivery options (see Appendix \ref{sec_data} for more detail).

%The discussion substantially increases participants' predictions that others in their group will \textit{privately} select transgender workers after the discussion by \input{../../outputs/stats/effect_norms_group.tex} p.p. (\autoref{tab_attitudes_beliefs_norms}, panel B, column 2, $p$\input{../../outputs/stats/p_val_group_norms.tex}). Moreover, this appears to be key mediator of the main treatment effects: controlling for within-group norms significantly weakens the effect on discrimination (\autoref{tab_mediation}), while controlling for other mechanism outcomes (community-wide norms, disapproval of discrimination, beliefs about reliability) does not. \textcolor{red}{The fact that it doesn't attenuate to 0 does imply that even holding group norms constant, there is still some effect on private behaviour (indicating some private change in normative beliefs independent of perceived norms).}

%The results also suggest that the norm updating from the discussion is about more than just the correction of a misperception, but are more about the creation of a misperception that possibly then ratchets over the course of a discussion.
%We observe that in the absence of a discussion, there is a significant misperception in which control participants underestimate the probability that their group members select a transgender worker by \input{../../outputs/stats/misper_diff_control.tex} p.p. ($p$\input{../../outputs/stats/pval_misper_control.tex}), suggesting an initial overestimation of discrimination. But after a discussion, if anything participants slightly overesitmate how likely others are to select a trans worker (by 2.9 p.p., $p$$=$0.10)


%\textcolor{red}{Fits with spiral of silence}

%\textcolor{red}{Still need to cite Leo Saudi Arabia paper}
%\footnote{
% This idea is motivated by evidence in other contexts showing that correcting misperceptions about discriminatory norms can reduce anti-minority behavior \citep{bursztynMisperceivedSocialNorms2020}. 
%Alternatively, if participants initially \textit{underestimated} how discriminatory their peers were, and this misperception was not corrected in the discussion, they may have faced social pressure to discriminate less in the group discussion. \autoref{fig_group_predic} shows that this does not fit the data.}

%This tells us something about the nature of norm updating from the discussion - in particular, that the results are \textit{not} simply the result of correcting a misperception about the norm, but more likely are due to the \textit{creation} of a misperception (\textcolor{red}{and dynamics}
%\footnote{
%Participants also made incentivized predictions about the choices of others in the study whom they did not know. The predicted probability of selecting a transgender worker increased by a modest \input{../../outputs/stats/effect_norms.tex} p.p. (\input{../../outputs/stats/effect_norms_perc.tex}) in the discussion arm (\autoref{tab_attitudes_beliefs_norms}, panel B, column 1). However, these do not appear to mediate the effects on discrimination: controlling for within-group norms significantly attenuates the effect on discrimination, while controlling for community-wide norms does not (\autoref{tab_mediation}).
%}







%In line with the proposed channel, control participants underestimate the probability that their group members select a transgender worker by \input{../../outputs/stats/misper_diff_control.tex} p.p. ($p$\input{../../outputs/stats/pval_misper_control.tex}), suggesting an initial overestimation of discrimination. However, a corrected misperception is not sufficient to explain the discussion's effects. 
%While the discussion does stop participants overestimating discrimination, it also generates a large level-shift of roughly 20 p.p. in \textit{both} the predictions and actual choices: people discriminate significantly less than would be the case if the control group's misperceptions were simply corrected.\footnote{%
%A second piece of evidence against the misperception channel is based on  \textit{No discussion (public)} participants, who were told the \textit{public} choices of others in their group before making predictions about \textit{private} choices. They also had their misperceptions corrected (\autoref{fig_group_predic_phase2}, estimate of misperception: $\input{../../outputs/stats/misper_public.tex}$ p.p., $p$=\input{../../outputs/stats/misper_public_pval.tex}), but the effect on discrimination in this arm was much smaller than the effect of the discussion (\autoref{tab_mech_r1_r2}).
%}  The control-group misperception was \input{../../outputs/stats/misper_diff_control.tex} p.p., and the total change in beliefs was \input{../../outputs/stats/effect_norms_group.tex} p.p. (\autoref{tab_attitudes_beliefs_norms}, panel B, column 2). Thus, even under the very generous assumption that the post-discussion effect on discrimination was \textit{solely} driven by changes in second-order beliefs, correcting the misperception would only account for \input{../../outputs/stats/misperc_accounting.tex} (bootstrap 95\% CI: [\input{../../outputs/stats/misper_accounting_lower.tex}, \input{../../outputs/stats/misper_accounting_upper.tex}]) of the discussion's treatment effect.\footnote{This back-of-the-envelope calculation also assumes that second-order beliefs translate \textit{linearly} to post-discussion discrimination, and that beliefs correct precisely and do not overshoot.} Thus, although correcting a misperceived norm might contribute to the discussion's impact, it is unlikely to account for the whole effect.




%Evidence that simply observing others' choices leads to an update in perceived within-group norms, and that this then leads to reductions in discrimination:


\textbf{Effect of observing others' choices}. To isolate whether norm-updating alone is sufficient to reduce discrimination, I examine the observers in the \textit{No discussion (public)} treatment. Simply observing others' public choices (without a discussion) increases predictions that group members will select transgender workers in private by \input{../../outputs/stats/effect_of_announce_sob_coeff.tex} p.p. ($p$=\input{../../outputs/stats/effect_of_announce_sob_p.tex}, \autoref{tab_effect_of_announce_sob_discrim}, column 1), correcting participants' underestimation of discrimination ($p$-value of misperception = 0 is \input{../../outputs/stats/misper_public_pval.tex}).\footnote{There are no average effects of making choices public \textit{in the treatment round} ($\beta$=\input{../../outputs/stats/effect_r1_public.tex} p.p., $p$=\input{../../outputs/stats/p_val_r1_public.tex}), so observing public choices in the \textit{No discussion (public)} arm is on average equivalent to observing control group's choices.} This corrected norm translates one-for-one into reduced discrimination in the private round ($\beta$=\input{../../outputs/stats/effect_of_announce_discrim_private_coeff.tex} p.p., $p$=\input{../../outputs/stats/effect_of_announce_discrim_private_p.tex}, column 3).\footnote{Using the cross-sectional heterogeneity in whether participants observed pro- or anti-trans choices further shows that observing one additional group member's choice to select a transgender worker (out of a maximum of 4) increases the probability of a selecting transgender worker in the private outcome round by \input{../../outputs/stats/effect_of_announce_discrim_het_coeff.tex} p.p. ($p$\input{../../outputs/stats/effect_of_announce_discrim_het_p.tex}, column 4).} In other words, simply observing others' choices without a discussion leads to small updates in perceived within-group norms, and this is sufficient to reduce later private discrimination. While this demonstrates that norm-updating can reduce discrimination, the discussions' effects are larger. This plausibly reflects the sources of asymmetry documented in Section \ref{sec_mechanisms_asymmetry}: for example, pro-trans participants' greater willingness to speak up likely leads to larger shifts towards an anti-discriminatory norm.


%pro-trans participants' asymmetric willingness to speak up may shift perceived norms more strongly away from discrimination.


%\textcolor{red}{NEED TO EXXPLAIN THAT THERE IS A MISPERCEPTION SOMEWHERE???}


%. \textcolor{red}{Instead, we require a ratcheting effect à la the Spiral of Silence story in a repeated game; or misperceptions driven by a differential willingness to speak up (see above); or some effects driven by the narratives and actual content of individual's communication  to explain the results.. (which I turn to next)} 




%The magnitudes here are smaller than the magnitudes of the discussion, which is again a suggestion that a \textit{pure} misperception correction is not enough to explain the results. Instead, we require a ratcheting effect à la the Spiral of Silence story in a repeated game; or misperceptions driven by a differential willingness to speak up (see above); or some effects driven by the narratives and actual content of individual's communication  to explain the results.. (which I turn to next)



%To explore whether an intervention that updates norms \textit{without} the other aspects of the discussion can reduce discrimination, I show that simply correcting participants' beliefs about their other group members' discrimination by showing them their choices is sufficient to drive a reduction in discrimination. I show here that simply observing others' choices without a discussion leads to a (smaller) update in perceived within-group norms, and that this then leads to knock-on reductions in discrimination.

%Observing others' public choices in the first round in the \textit{No discussion (public)} arm leads to small average increases in the predicted probability that other group members will select a transgender worker in their later private choices, by \textit{correcting the underestimation of discrimination that control participants start with (\textcolor{red}{show misperception stats)}} (\autoref{tab_mech_r1_r2}, \textcolor{red}{ADD p-val and coeff}). This translates almost 1 for 1 to a reduction in one's own outcome round private discrimination of \textcolor{red}{4.5 p.p.}. Making use of the cross-sectional heterogeneity, this implies increasing the proportion of group members' choices observing one additional group member choice that selects a trans worker is associated with a 6.4 p.p. ($p$$<$0.001) (\autoref{tab_effect_of_announce_sob_discrim})

%(There are no average effects on treatment-round choices, so observing public choices in the \textit{No discussion (public)} arm is on average equivalent to observing control group's choices.)

%\textcolor{red}{Add misperceptions to the table?}

















\begin{table}[!h]
\caption{Effect of observing others' choices}
\label{tab_effect_of_announce_sob_discrim}
\centering
\resizebox{1\textwidth}{!}{
\input{../../outputs/tables/effect_of_announce_sob_discrim.tex}
}
\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: Standard $p$-values are in brackets.
	Unit of observation is the participant $\times$ choice level. Outcome in columns 1--2 is the predicted probability that other group members select a transgender worker in private. Outcome in columns 3--4 is whether participant selected a transgender worker themselves in the outcome round. In all columns the sample is only the \textit{Observers (no discussion, public)} and the control group (\textit{No discussion, private}) in phase 2. \textit{P(others in group selected trans in treatment round)} ($\pi_{-i}$) is the proportion of times (out of a maximum of 4) that the other two participants in the group selected a transgender worker in the treatment round. All regressions control for \textit{P(selected trans in treatment round}, the proportion of times (out of a maximum of 2) that the participant herself selected a transgender worker in the treatment round.  Other controls include stratum fixed effects; dummies for the rights videos; whether the alternative worker was shown on the right; relative \# items offered by the transgender worker; relative reliability score; and a dummy for whether the reliability score was shown.  
		\end{tablenotes}
\end{table}





\subsection{Suggestive evidence against attitude- or belief-based persuasion} 

The available descriptive evidence does not favor attitude- or belief-based persuasion. 

%None of this is conclusive, but some suggestion that persuasion (leading to changes in personal attitudes or personal normative beliefs) is a weaker channel than norm-updating.

\textbf{Attitudes}. Discussions produce only small or null effects on measures of personal attitudes towards transgender workers, indicating that effects are more likely to be operating through norm-updating than through changes in attitudes. The proportion of participants who said that (in their opinion) discrimination was wrong in response to two vignettes increased only slightly from \input{../../outputs/stats/prop_attitude_control.tex} to \input{../../outputs/stats/prop_attitude_treat.tex} ($p$ of difference: \input{../../outputs/stats/attitude_p_val.tex}, \autoref{tab_attitudes_beliefs_norms}, panel B, column 2). Controlling for this measure of private attitudes does not weaken the effects of discussion on discrimination ($p$=\input{../../outputs/stats/mediation_attitude_p.tex}, \autoref{tab_mediation}). There are also no effects on a double list experiment \citep{droitcour2004item, glynnWhatCanWe2013} that measured how many people agreed with the statement ``In general, if I see a transgender person, I walk away'' ($p$=\input{../../outputs/stats/p_val_list_experiment.tex}, \autoref{tab_attitudes_beliefs_norms}, panel B, column 1). 

\textbf{Beliefs about reliability}. Belief-based (statistical) discrimination appears to underlie some of participants' unwillingness to select transgender people, driven by negative stereotypes that portray transgender workers as unreliable. However, the effect of the discussion does not appear to be driven by changes in such statistical discrimination. The discussion does not significantly affect beliefs about the reliability of transgender workers ($p$=\input{../../outputs/stats/p_val_belief_reliability_discussion.tex}, \autoref{tab_attitudes_beliefs_norms}, panel A, column 3); controlling for these beliefs does not weaken the effect on discrimination (\autoref{tab_mediation}); and I find no evidence that the 3-person discussion reduces the belief-based component of discrimination, although I am underpowered for this latter test (\autoref{tab_statistical_discrim}, column 2).%\footnote{While the point estimate of the interaction of (\textit{Worker is trans} $\times$ \textit{Reliability score is shown} $\times$ \textit{3-person discussion}) is negative and large enough to negate the effect of (\textit{Worker is trans} $\times$ \textit{Reliability score}), I cannot reject that it is different from 0 ($p$=\input{../../outputs/stats/p_val_effect_on_statistical_discrim.tex}).}




%Belief-based (statistical) discrimination appears to underlie some of participants' unwillingness to select transgender people, driven by negative stereotypes that portray transgender workers as unreliable. Despite transgender workers having the same average reliability score as other genders, participants rate transgender workers as less likely to complete a delivery (\autoref{tab_attitudes_beliefs_norms}, panel A, column 3; discussed below). To test whether this leads to discrimination, half of the choice-pairs included information about the reliability of both workers. Revealing the reliability score makes participants \input{../../outputs/stats/statistical_discrim.tex} p.p. more likely to select a transgender worker, and this effect is unique to transgender workers (\autoref{tab_statistical_discrim}, column 1). Anti-transgender discrimination in the control group therefore appears to be partially driven by a form of inaccurate statistical discrimination \citep{bohrenInaccurateStatisticalDiscrimination2023}.

%However, the effect of the discussion does not appear to be driven by changes in such statistical discrimination. The discussion does not significantly affect beliefs about the reliability of transgender workers (\autoref{tab_attitudes_beliefs_norms}, panel A, column 3). And I find no evidence that the 3-person discussion reduces the belief-based component of discrimination, although I am not well-powered for this test (\autoref{tab_statistical_discrim}, column 2).\footnote{While the point estimate of the interaction of (\textit{Worker is trans} $\times$ \textit{Reliability score is shown} $\times$ \textit{3-person discussion}) is negative and large enough to negate the effect of (\textit{Worker is trans} $\times$ \textit{Reliability score}), I cannot reject that it is different from 0 ($p$=\input{../../outputs/stats/p_val_effect_on_statistical_discrim.tex}).}



%\textcolor{red}{ADD TABLE REFERENCE}



%I find that self-reported disapproval of discrimination is high, even in the control group. Participants were presented with two scenarios depicting instances of discrimination against transgender individuals and were asked whether the discriminator's actions were acceptable or wrong. The probability of saying that discrimination was wrong was already high in the control group (\input{../../outputs/stats/prop_attitude_control.tex}), and increased slightly in the discussion arm (\input{../../outputs/stats/prop_attitude_treat.tex}, $p$ of difference: \input{../../outputs/stats/attitude_p_val.tex}, effect size: \input{../../outputs/stats/attitude_effect_sd.tex} SD).\footnote{I also examine a double list experiment \citep{droitcour2004item, glynnWhatCanWe2013} that preserves anonymity of responses. It measured the proportion of people who agreed with the statement ``In general, if I see a transgender person, I walk away.'' (see Appendix \ref{sec_list_exp}). 20\% of the control group agreed with the anti-trans statement, and the discussion did not have a significant effect on the proportion of people who agreed with the sensitive anti-transgender statement in the list experiment (\autoref{tab_attitudes_beliefs_norms}, panel A, column 1).}
%Since the list experiment did not allow the enumerator or the researcher to infer \textit{which} statements the participant agrees with from the list of statements, it preserved the anonymity of their responses and so was less likely to be vulnerable to social desirability concerns than standard self-reported attitude questions. \textcolor{red}{COMMENT ON LEVELS??}.}
%In addition, controlling for this measure of private attitudes does not weaken the effects of discussion on discrimination (\textcolor{red}{add p-value}, \autoref{tab_mediation}).  \textcolor{red}{ADD NOTE HERE ON BELIEFSF ABOUT RELIABILITY}

%Given these results, it seems less likely that the discussion is operating by changing people's personal attitudes/personal normative beleifs / personal opinions about behavior towards trans workers, and is instead operating by changing people's beliefs about the norm. This is suggestive evidence against a persuasion channel.\footnote{}









%these may not be correlated given the differences in willingness to speak up (\autoref{}, rather than persuasiveness with regards to transgender workers, and the different patterns in willignness to speak up suggest that these may not be correlated. 


%A final piece of suggestive evidence against persuasion is that participants with group members who are rated as highly persuasive do not discriminate less, either for listeners or for 3-person discussion participants ($p$=\input{../../outputs/stats/pval_het_group_characs_listener.tex} and \input{../../outputs/stats/pval_het_group_characs_discussion.tex}, \autoref{tab_het_group}).\footnote{Tellingly, the two subcomponents of this index that do correlate with larger treatment effects are group members that are ``like a leader'' and who are ``inspiring'': both of these are plausibly individuals who are better at signalling prescriptive norms, rather than necessarily those who persuade one to change ones attitudes.}
%

%Group members' persuasiveness does not predict reductions in discrimination, again suggesting that persuasion is not the key channel. A composite index of perceived persuasiveness shows no correlation with treatment effects for either listeners or 3-person discussion participants ($p$=\input{../../outputs/stats/pval_het_group_characs_listener.tex} and \input{../../outputs/stats/pval_het_group_characs_discussion.tex}, \autoref{tab_het_group}).\footnote{The only peer characteristics correlated with larger treatment effects -- being ``like a leader'' or ``inspiring'' -- plausibly indicate group members who are better at signalling norms, rather than those who are persuasive. Close relationships between group members also do not strengthen the discussion's impact ($p$=\input{../../outputs/stats/pval_het_group_characs_discussion_close.tex}, \autoref{tab_het_group}, column 4), most likely because this correlates minimally with how similar discrimination is within a group ($p$ of difference in ICC in the control group: \input{../../outputs/stats/ri_p_icc_relation_control.tex}) or with how accurately participants predict their group members' choices ($p$=\input{../../outputs/stats/group_predic_accuracy_by_relation_p.tex} in the control group).}

%\textcolor{red}{New version}








%However, the persuasiveness score does not correlate with whether a participant speaks up in favor of transgender workers ($\rho$=\input{../../outputs/stats/ls_inv_cov_spoke_pro_trans_corr.tex}, $p$=\input{../../outputs/stats/ls_inv_cov_spoke_pro_trans_pval.tex}), so it leaves open the possibility that heterogeneity is driven by the differential propensity of pro-trans people to speak up, even if there is no difference in persuasiveness conditional on speaking up.

%Relatedly, the questions ask about persuasiveness in general, rather than about transgender workers, related to the fact that pro-trans participants are not more likely to speak up in general even if they speak up about transgender workers more.

%Finally, the two submeasures of peer characteristics that are correlated with larger treatment effects -- being ``like a leader'' or ``inspiring'' -- plausibly indicate group members who are better at signalling norms are associated with reductions in discrimination (rather than those likely to speak up in general or who are good at changing others' private attitudes).











%Score does not correlate with whether a participant speaks up ($\rho$=\input{../../outputs/stats/ls_inv_cov_spoke_pro_trans_corr.tex}, $p$=\input{../../outputs/stats/ls_inv_cov_spoke_pro_trans_pval.tex})
%
%Driven by \textit{whether} they speak up rather than how persuasive they are conditional on speaking... (\textcolor{red}{is this consistent with morality stuff})?
%
%OR not driven by individual's persuasiveness but the availability of particularly persuasive arguments regarding transgender workers (based on morality). Related to how they are not more likely to speak up IN GENERAL...
%
%The only 











%The only influential peer characteristics - being "like a leader" or "inspiring" - suggest norm-setting rather than persuasion drives behavior change.


%\textcolor{red}{Groups with closer relationships also don't have larger impacts ($p$=\input{../../outputs/stats/pval_het_group_characs_discussion_close.tex}, \autoref{tab_het_group}, column 4), most likely because this correlates minimally with how similar discrimination is in the group ($p$=\input{../../outputs/stats/ri_p_icc_relation_control.tex} in the control group or how accurately participants predict their group member's private behaviiour ($p$=\input{../../outputs/stats/group_predic_accuracy_by_relation_p.tex} in the control group)}








	
\section{Mechanisms: sources of asymmetry}
\label{sec_mechanisms_asymmetry}

%Having established that norms-based persuasion is the primary channel through which within-discussion behavior impacts post-discussion behavior, I now ask: why does the discussion lead to a large \textit{average} shift in post-discussion behavior? 




Having shown evidence in favor of norms-based persuasion as a key channel for the effects on post-discussion behavior, I now examine why discussions produce a large average shift away from discrimination. 

Standard models of communication in economics predict that communication should lead behavior to converge, and (absent misperceptions in participants' priors) should not change average behavior.\footnote{Suppose each participant \( i \) is learning about their own (dis)utility \( \theta_i \) from hiring a transgender worker. Under a standard Bayesian communication model (with a common prior and truthful information exchange), participants converge to a common posterior \citep{aumannAgreeingDisagree1976, geanakoplosWeCantDisagree1982}. The martingale property of Bayesian updating also implies that the posterior belief (and therefore post-communication behavior) will not shift on average across participants relative to the prior \textit{if} the prior is correctly specified, i.e., it accurately represents the true data-generating process. %Incorrect priors would lead to a systematic change in average beliefs if they are misspecified relative to the true data-generating process, so unbiased signals of $\theta_i$ shift the posterior away from the initial prior on average. %For example, if there is a single $\theta$ shared by all participants ($\theta_i = \theta$ for all $i$), and the realized $\theta$ differs from the group's prior mean, then the group's beliefs in expectation shift towards $\theta$.
} 
% Two types of incorrect priors would lead to a systematic change in average beliefs: (i) the prior distribution may be misspecified relative to the true data-generating process, so unbiased signals of $\theta_i$ shift the posterior away from the initial prior on average; or (ii) if there is a single $\theta$ shared by all participants ($\theta_i=\theta$ for all $i$), and the realized $\theta$ differs from its prior mean, then the group's beliefs in expectation shift towards that mean.}
The substantial reduction in discrimination therefore requires asymmetries in the discussion process. I identify three key asymmetries: pro-transgender participants are disproportionately vocal, participants fail to correctly account for this selective communication when inferring norms, and moral arguments against discrimination appear to be more persuasive than anti-trans arguments.
Much of the evidence on these asymmetries comes from heterogeneity analyses rather than exogenous mechanism treatments; these results should be interpreted with some caution.  % While these results are thus suggestive, the combination of mechanism outcomes and detailed transcript data consistently point to these three sources of asymmetry.

%\textcolor{red}{Note that the evidence on these channels is based on heterogeneity in the results, rather than exogenous mechanism treatments, so the results should be taken as suggestive. But I have extensive mechanism outcomes and transcript data that help give a detailed picture of what sources of asymmetry are plausible.
%}

%\textcolor{red}{CAVEAT: the evidence here is correlational, blabla bla, need to be careful, but use mechanism outcoems and transcript data to understand better...}






\subsection{Behavior during discussion and willingness to speak up}


The first asymmetry I document is that pro-transgender participants are more vocal than others, leading to discussions that were strongly in favor of transgender workers. Their stronger advocacy appears to be partly driven by a stronger desire to influence other group members' choices.


	

	
	
	
	
%	In this section, I show a first source of asymmetry: that participants strongly favored hiring transgender workers during the discussion, driven by pro-transgender participants' willingness to speak up more than others. This, in turn, appears partly driven by their disproportionate desire to influence others' choices. %I later explore how these patterns explain the reductions in post-discussion discrimination (Section \ref{sec_mechanisms_post_discussion}).
	
	
%		\textcolor{red}{ISSUE - the analysis of the transcripts should kind of come in the first section}
	
	
%	To answer this question, I examine both what occurred during discussions (this section) and how discussions affected subsequent behavior (Section \ref{sec_mechanisms_post_discussion}). 
	
	
	
	
	
%	In this section I show that the behavior during the discussions was very favorable towards transgender workers, that this was partly driven by the fact that pro-trans participants spoke up more and dominated the discussion, and that this was partly because they aimed to influence others to discriminate less.
	

	
	
	
%	To understand how the discussion reduced post-discussion discrimination, I will explore (i) what occurred during the discussion (this section), and then (ii) how the discussion affected post-discussion behavior (Section \ref{sec_mechanisms_post_discussion}).
	
	
%	\textcolor{red}{SEPARATE MECHANISMS INTO: (i) what goes on in the discussion, and then (ii) how does this affect post-discussion behaviour?}

%To understand how the discussion encouraged people to discriminate less \textit{after} the discussion, I first document evidence that participants exhibit pro-trans behavior \textit{during} the discussion, and that this is driven by pro-trans participants being particularly likely to speak up and to dominate the discussion. I then find suggestive evidence that this is because they aim to influence others' choices, possibly because they have a greater desire or greater ability to do so.

%I here describe how people behave towards and communicate about transgender workers \textit{during} the discussion. Broadly, the results show that participants act very positively tow

%{\color{red}ADD HIGH LEVEL DISCUSSION OF MECHANISMS}

%To understand how the discussion encouraged people to discriminate less, I first document that participants talk about transgender workers {\color{red}in a positive way \textit{during} the discussion, as measured by 3 metrics: the choices made during the discussion, the valence of statements about transgender workers, and the reasons given for selecting workers.} {\color{red}\textbf{(KAREN NOT CLEAR)}} This is in line with the evidence presented above on the emergence of a strong pro-trans norm over the course of the discussion.

\textbf{Choices during the discussion (treatment round)}. The choices made \textit{during} the discussion are favorable towards transgender workers. Participants were \input{../../outputs/stats/effect_discussion_r1.tex} p.p. more likely to select a transgender worker in the collective choices during the discussion than in the private choices made by \textit{No discussion (private)} participants (\autoref{tab_r1_main}, $p$$<$0.001). In the discussion choices, there was even positive discrimination in favor of transgender workers relative to non-transgender workers (\input{../../outputs/stats/effect_trans_discussion_r1.tex} p.p., $p$$<$0.001). %\footnote{Anti-trans discrimination is lower on average in the control conditions in the earlier treatment round compared to the later outcome round, with transgender workers being around 9 p.p. less likely to be chosen. This could be rationalized by a self-signaling model, in which some participants try to prove to themselves that they are a ``good person'' by selecting a transgender worker when they first see one, but do not feel the need to do so in later rounds.}  
%This suggests that participants persuade each other to discriminate less during the discussion, and that this spills over to later private choices. 

\textbf{Pro- and anti-trans statements in discussion.} Participants also on average  communicated about transgender workers in a positive way. Based on transcripts with a sentence-level indicator of arguments in favor of or against the transgender worker, participants were \input{../../outputs/stats/ratio_pro_anti_transcripts.tex}x more likely to make pro-trans than anti-trans statements (\input{../../outputs/stats/mean_pro_transcripts.tex} of sentences were pro-trans, vs. \input{../../outputs/stats/mean_anti_transcripts.tex} of sentences were anti-trans, $p$ of difference \input{../../outputs/stats/pval_pro_transcripts.tex}).





%Enumerators observed the discussion and noted down (for each choice that involves a transgender worker) how many participants said something positive about the transgender worker, and how many said something negative about the transgender worker. 


%\textcolor{red}{USE TRANSCRIPTS}




%Statements about transgender workers in the discussion were typically positive: participants were \input{../../outputs/stats/ratio_pos_neg_mentions.tex}x more likely to say something positive about a transgender worker than to say something negative about them in the discussion (\autoref{fig_prob_positive_mentions}).


%{\color{red}\textbf{DISCUSSION VALENCE} - add here \input{../../outputs/stats/ratio_pos_neg_mentions.tex}}x more likely to mention positive than negative


%When shown a choice-pair that included a transgender worker, participants were more likely to say that they were making their choice explicitly \textit{because} the worker was transgender. They were also significantly more likely to cite pro-social reasons for helping a worker, for example saying that they were making their choice because they wanted to give an opportunity to that worker or to help them. This sharp increase in pro-social reasoning is driven by groups that actually chose the transgender worker. Groups were also significantly less likely to cite items, worker details, or other characteristics as the reason for making their choices when discussing a pair with a transgender worker.
%Listeners do not influence the discussion themselves, but those who hear pro-social and gender-based reasons in the 2-person discussion tend to choose transgender workers more often in the outcome round (\autoref{fig_reasons_to_listeners}). This evidence is only correlational, but it suggests that the increase in pro-social reasoning persuaded others to discriminate less. 

%The increase in pro-social reasoning also translates to participants' reasoning in the private outcome round. In phase 2, participants were asked why they made their outcome round choices. Participants who had been involved in a discussion  were more likely than those in the control arm to cite pro-social reasons for their choices (\autoref{fig_reasons_for_choices_r2_aggr} and \autoref{fig_reasons_for_choices_r2}). 

%These descriptive facts provide a potential explanation for why there might be asymmetric persuasion, i.e., why endorsing a transgender worker may be more persuasive than rejecting them. When trying to advocate for non-transgender workers, participants more frequently discuss items, the photo of the worker, and the details contained in the worker profile. But when advocating for a transgender worker, participants explicitly say they are choosing workers because they are transgender, and in order to help them.  Plausibly, these latter considerations simply outweigh the practical considerations in a debate. For example, pro-social reasoning might shift participants' decision framing away from the \textit{practical or economic} choice of which delivery worker to hire, and towards the \textit{moral} choice of helping out a discriminated group. 


%If they do outweigh the practical considerations, then if at least one person cites pro-social reasons, everyone follows. If this is true, then one would only need one person in a group to advocate for transgender workers for the whole group to be convinced, whereas if only one person was anti-transgender, they might be easily outweighed in debate. This asymmetry could explain the large overall effects of the discussion.




\textbf{Substantial minority of pro-trans participants.} A substantial minority of participants favor transgender workers despite strong average discrimination. \autoref{pref_distribution} presents individual-level estimates of willingness to pay (WTP) for transgender workers in the outcome round, derived from a regression with individual fixed-effects. The distribution reveals wide variance, so despite a substantial negative WTP on average, \input{../../outputs/stats/pref_prob_control.tex} of control participants have positive WTP for transgender workers. This sizable pro-transgender faction is likely to be important for explaining the pro-transgender nature of group discussions.
	
	
	





\textbf{Pro-trans participants are more vocal.} This pro-trans pattern in the discussion appears to be partly explained by pro-trans individuals choosing to be more vocal. I use private choices after the discussion as a proxy for baseline pro-trans behavior and show that this is correlated with dominating the discussion of transgender workers (\autoref{tab_discussion_dominance}).\footnote{Since I purposely did not collect baseline measures of discrimination (to minimize priming and experimenter demand effects), the evidence here should be taken as suggestive. However, under a reasonable monotonicity assumption, the post-discussion discrimination will correlate positively with baseline discrimination: as long as participants are not too effective at persuading each other, initially anti-trans participants are unlikely to become more pro-trans than those who started off as pro-trans from the beginning.} %An additional transgender worker selected in private is associated with 
%Each additional transgender worker selected in the private outcome round is associated with being \input{../../outputs/stats/coeff_spoke_first_trans.tex} p.p. (\input{../../outputs/stats/perc_spoke_first.tex}, $p$=\input{../../outputs/stats/spoke_first_p_val.tex}) more likely to speak first when facing a choice with a transgender worker, and \input{../../outputs/stats/coeff_dominant.tex} p.p. (\input{../../outputs/stats/perc_dominant.tex}, $p$=\input{../../outputs/stats/dominant_p_val.tex}) more likely to be rated by enumerators as dominating the discussion when faced with a transgender worker. 
%
%
For each additional transgender worker a participant selected in the private round, they were \input{../../outputs/stats/coeff_spoke_first_trans.tex} p.p. more likely to speak first (\input{../../outputs/stats/perc_spoke_first.tex}, $p$=\input{../../outputs/stats/spoke_first_p_val.tex}) and \input{../../outputs/stats/coeff_dominant.tex} p.p. more likely to dominate discussions (\input{../../outputs/stats/perc_dominant.tex}, $p$=\input{../../outputs/stats/dominant_p_val.tex}) when discussing transgender workers (according to enumerator observations). No such correlation exists for non-transgender worker discussions, implying that pro-trans participants selectively spoke up about transgender workers, despite not being more talkative in general. This asymmetry could plausibly be a driver of reductions in post-discussion discrimination. %This is likely to be a key driver of the pro-trans pattern of communication in the discussion, and thus of the overall reduction in post-discussion discrimination. \textcolor{red}{FRAME IN TERMS OF ASYMEMTRY}



%This association is specific to choices that involve a transgender worker; these same participants are not more likely to dominate when faced with non-transgender choices. 


%Pro-trans participants dominated the discussions, explaining why groups moved toward less discrimination rather than more. 






% Why did discriminatory participants persuade each other to be more \textit{pro}-trans rather than more \textit{anti}-trans? 

%This implies that certain pro-trans participants, despite not being generally more talkative, act as pro-minority activists by \textit{deciding} to speak up strongly in favor of transgender workers. Anti-trans participants, on the other hand, tend to stay silent, resulting in the highly pro-trans pattern of communication documented in Section \ref{sec_beh_during_discussion} (e.g., statements about transgender workers were \input{../../outputs/stats/ratio_pos_neg_mentions.tex}x more likely to be pro-trans than anti-trans). This is likely to be a key driver of the large persuasive effects in favor of transgender workers.


\subsubsection{Why do pro-trans participants speak up more?} 

%\textcolor{red}{NEED TO MAKE THIS SECTION MORE CONCISE}

Pro-trans participants might have spoken up more because of \textit{social image concerns} (e.g., anti-trans participants self-censor because they didn't want to appear discriminatory), or because they had greater \textit{influence concerns} (e.g., pro-trans participants cared more about stopping others taking discriminatory actions) (see \citet{benabouNarrativesImperativesMoral2020} for a framework describing these motives). Suggestive evidence from mechanism outcomes indicate that influence concerns were likely more important in driving pro-trans participants' advocacy. %\textcolor{red}{I explicitly designed the experiment to allow participants to freely choose when to speak and what to say, in order to more closely resemble non-experimental discussions. While this makes it empirically challenging to understand participants' decision to speak, I can nevertheless use the mechanism treatments and outcomes to show that influence concerns were likely more important in driving pro-trans participants' advocacy.}

%
%Because I designed the experiment so that participants were able to freely choose how and what to discuss

%Here I explore why pro-trans participants appear to speak up more. While this is empirically challenging, because I wanted people to be able to freely choose to discuss & to more closely map to how open-ended discussions would take place “in the wild” , this means that choice is entirely up to the participant; means it is hard to pin down the mechanism; but nevertheless find suggestive evidence that suggests pro-trans participants are motivated by a desire to influence others' behavior rather than of signalling something about themselves. This evidence is largely correlational so is limited in what it can tell us.

%Note that at this stage I remain agnostic about \textit{how} the (e.g., through altering others' private attitudes or beliefs, altering perceived norms, etc.) I explore that in the next section.

\textbf{Social image concerns}. 
%I find no evidence that pro-trans participants' willingness to speak up is driven by social image concerns, 
There is no strong evidence for social image concerns driving pro-trans participants' disproportionate willingness to speak up. First, the \textit{No discussion (public)} treatment did not reduce discrimination in the treatment round (\autoref{tab_mech_r1_r2}, column 1, $p$=\input{../../outputs/stats/p_val_r1_public.tex}), implying that exogenously increasing social image concerns by revealing choices to other group members did not reduce discrimination.\footnote{The null effect is not driven by participant inattentiveness, because participants' choices in \textit{No discussion (public)} did react: they converged within groups, despite no mean effects ($p$-value of effect on intracluster correlation within groups: \input{../../outputs/stats/ri_p_icc_private_public.tex}).} This suggests that social image concerns are unlikely to be a key driver of pro-trans communication in the discussions, although importantly I cannot rule out that social image concerns have a stronger effect in discussions than when silently making choices (e.g., because the social costs of speaking are larger than choosing, or because of dynamically increasing social image concerns after pro-trans statements). %\footnote{%
%Pro-trans communication may be a stronger virtue signal than pro-trans choices if there is more plausible deniability when making choices without having to verbally explain them. This channel seems unlikely to be important, since the \textit{No discussion (public)} arm has no effect even when there is little plausible deniability for participants, namely, when transgender workers offer more items ($\beta$=\input{../../outputs/stats/coeff_public_dominates.tex}, $p$=\input{../../outputs/stats/p_val_public_dominates.tex}).
%It is possible that pro-trans \textit{communication} in the dsicussion is a stronger virtue signal than pro-trans \textit{choices} (for example, because there is more plausible deniability when making choices without having to verbally explain them). I cannot rule this out, although this is made somewhat less likely by the fact that the \textit{No discussion (public)} arm has no effect even when there is little plausible deniability for participants, namely, when transgender workers offer more items ($\beta$=\input{../../outputs/stats/coeff_public_dominates.tex}, $p$=\input{../../outputs/stats/p_val_public_dominates.tex}).%I also cannot rule out that virtue signaling \textit{in combination} with other mechanisms contributes to the discussion's effects, e.g., if pro-trans participants are initially more vocal for other reasons and subsequently induce other participants to virtue signal.
%} 
Second, pro-trans advocates were not more likely to report making statements ``that others would agree with'' or in order ``to look good'' in post-discussion debriefs compared to those who didn't advocate for transgender workers (\autoref{debrief_spoke_differences_bar}, bottom panels, $p$= \input{../../outputs/stats/spoke_pro_trans_dd2_pval.tex} and \input{../../outputs/stats/spoke_pro_trans_dd5_pval.tex}). 


%post-discussion debrief questions indicate that participants who spoke up in favor of transgender workers were no more likely to say that their communication  those who spoke up in favor of transgender workers during the discussion were not more likely to say that they were saying things ``in order to look good'' or ``that others would agree with'' (\autoref{debrief_spoke_differences_bar}, bottom panels).

\textbf{Influence concerns}. The evidence instead hints at an aim to persuade others to change their behavior. Participants who spoke in favor of transgender workers were \input{../../outputs/stats/spoke_pro_trans_dd3_coeff.tex} p.p. more likely to report trying to get others to agree with them (\input{../../outputs/stats/spoke_pro_trans_dd3_prop_increase.tex}, $p$=\input{../../outputs/stats/spoke_pro_trans_dd3_pval.tex}, \autoref{debrief_spoke_differences_bar}, top left). While these differences are only correlational and do not extend to all measures of persuasion (\autoref{debrief_spoke_differences_bar}, top right), they suggest pro-trans participants disproportionately engaged in the discussion to shape others' choices. Pro-trans participants' behavior may be more driven by persuasion concerns than anti-trans participants for a number of reasons: they may have a more intense preference in favor of transgender workers, or they may care more about others' behavior (e.g., if they perceive discrimination as a social wrong, rather than a personal preference). Alternatively, pro-trans participants may recognize their rhetorical advantage \citep{schkadeDeliberatingDollarsSeverity2002}: as I show below (Section \ref{sec_transcripts}), they deploy moral arguments that may be especially compelling, thus making their active participation in the discussion more attractive.

%\textcolor{red}{SOMEWHERE - document that preferences do not seem to be skewed (that would be a another source of asymmetry}



%Participants who spoke in favor of transgender workers were \input{../../outputs/stats/spoke_pro_trans_dd3_coeff.tex} percentage points more likely to report trying to get others to agree with them (\input{../../outputs/stats/spoke_pro_trans_dd3_prop_increase.tex}, $p$=\input{../../outputs/stats/spoke_pro_trans_dd3_pval.tex}). While these differences are modest and do not extend to all measures of influence (\autoref{debrief_spoke_differences_bar}), they suggest pro-trans participants strategically engaged to shape others' choices.

%Instead, I find some moderate suggestive evidence that pro-trans participants speak up more because they aim to influence others' behavior. If, for example, pro-trans participants have a more intense preference in favor of transgender workers, care more that others select transgender workers, or are more confident in their ability to affect others' behavior to be in line with their preferences, then this would explain why pro-trans participants are more willing to speak up more. In line with this hypothesis, I find that participants who speak in favor of trans workers in the discussion are \input{../../outputs/stats/spoke_pro_trans_dd3_coeff.tex} p.p. (\input{../../outputs/stats/spoke_pro_trans_dd3_prop_increase.tex}, $p$=\input{../../outputs/stats/spoke_pro_trans_dd3_pval.tex}, \autoref{debrief_spoke_differences_bar}, top left) more likely to say they were trying to get others to agree with them. There are limitations to this evidence: the differences are relatively small, do not translate to another measure of ``influencing others'' (\autoref{debrief_spoke_differences_bar}, top right), and could be driven by unobserved baseline differences in the types of people who speak up pro-trans or not. Nevertheless the evidence is consistent with a greater propensity to try to influence others.

%I discuss later that participants were much more likely to use \textit{moral reasoning} in discussions about transgender workers, and that this seems to partially explain reductions in post-discussion discrimination. While I do not have direct evidence for this, this focus on moral suasion could underlie a form of \textit{rhetorical asymmetry} \citep{schkadeDeliberatingDollarsSeverity2002} in which it is easier to argue in favor of selecting transgender workers because one can leverage moral arguments, whereas arguing against selecting transgender workers is less persuasive. This would imply that pro-trans participants may anticipate that they are better able to influence others' discussions, and are therefore more likely to speak up in the discussions. \textcolor{red}{This is a bit hanging / overly specultaive, baseless, can I provide any evidence about this???}


%\textcolor{red}{Add summary note}











%Using the \textit{No discussion (public)} arm, I test for virtue signaling by examining whether social image concerns alone can promote pro-trans choices in a group. Participants in this arm knew others would see their choices in the treatment round, but did not discuss those choices. If virtue signaling was driving behavior, we would therefore expect more pro-trans choices in this public setting.



%\textcolor{red}{Cite the public arm not having effect in treatment round... unliekly to be virtue signalling; although caveat. Don't say this too strongly, allow the reviewers to be happy with censoring story?}

%Caveat - social image concerns for \textit{speaking up} may be stronger or weaker depending on social costs, of costs of lying, etc. I cannot directly test. Limitation is that I cannot totally rule out virtue signalling...






%\textbf{Incentive to influence by persuading or changing perceived norms; stronger preferences}


















%\textit{Model}. Why do pro-trans participants speak up even when on average their peers are predominantly discriminatory? To make progress on this question, I develop a model to explain this behaviour and the conditions under which it could occur (see Appendix \ref{sec_model}). The model starts with the insight from social signaling theory that participants strive to fit in with their group when making observable choices in social settings \citep{aschStudiesIndependenceConformity1956, benabouIncentivesProsocialBehavior2006}. This fits the observation that participants more closely match their group member's behavior in the \textit{No discussion (public)} condition (Table \ref{tab_icc}). When no persuasion is possible, as in the standard model, the only way for a pro-trans individual to fit in with an anti-trans group is to discriminate. But when I extend the model to allow participants to \textit{persuade} each other in a discussion, pro-trans people can also fit in with their group by persuading others to have pro-trans preferences. And because pro-trans people start off further from the existing discriminatory norm, they have a greater incentive to persuade others in their group. Under the right conditions, this incentive can outweigh the incentive to simply stay silent and conform to the existing anti-trans norm. 

%Furthermore, the model shows that there can be a ``sweet spot'' range where \textit{only} the pro-trans participants argue in favour of selecting a transgender worker, and anti-trans participants stay silent. This sweet spot occurs when average levels of discrimination are not too strong (otherwise no-one is even privately pro-trans) and not too weak (otherwise pro-trans participants do not have an incentive to persuade others; they are already in the majority).\footnote{The model also allows for the opposite equilibrium to occur: when attitudes are in general more pro-trans, only the anti-trans participants speak up. This mimicks the idea of ``backlash'' that can occur when people fight back once attitudes become somewhat more progressive.}

%The experiment was not designed to provide a conclusive test of this model. Nevertheless, the data are consistent with the model's prediction of a sweet spot where pro-trans participants speak up and anti-trans participants largely stay silent. For example, the 49\% of discussions that involve pro-trans statements matches the 54\% of groups in the control arm that have at least one strongly pro-trans individual (as measured by someone who selects a transgender worker twice in the outcome round).





\subsection{Incorrect inference about the norm}
	
The second source of asymmetry that could explain the large shift towards an anti-discriminatory norm is that people did not make correct inferences, tending to \textit{over-update} on pro-trans behavior and to \textit{under-update} on anti-trans behavior during the discussion. For example, if participants did not account for pro-trans speakers' disproportionate willingness to speak up, the discussion would make them overestimate their group's true support for transgender workers. This mechanism builds on theories of misperceived norms \citep{bursztynMisperceivedSocialNorms2020}. But here, the norm is misperceived because of the conversation itself. I find two main pieces of suggestive evidence of such incorrect inference.
%For participants to correctly estimate how pro-trans their peers were, they would have to correctly account for their differential willingness to speak up. \textcolor{red}{Instead, I find evidence that they do not.}

\textbf{Silence neglect}. Participants appear to have neglected the informational value of silence during discussions, accentuating updates toward pro-transgender norms. While participants strongly updated their beliefs after hearing pro-transgender statements, they failed to interpret silence as evidence of anti-transgender attitudes. When no group members made pro-transgender statements, participants' predictions about their peers' choices remained indistinguishable from control group predictions ($p$
of difference: \input{../../outputs/stats/het_by_posneg_discussion_full_predic_intercept.tex}, \autoref{het_by_posneg_discussion_predictions}). This implies that participants did not interpret silence as a signal of anti-trans attitudes, even though silence often indicated opposition rather than indifference to selecting transgender workers. The failure to interpret silence as informative carried through to post-discussion behavior: 3-person discussion participants who heard no pro-trans statements did not increase discrimination relative to the control group (\autoref{het_by_posneg_discussion}).\footnote{For listeners, the raw data suggest that discussions with no pro-trans statements may have slightly decreased the predicted and actual probability of choosing a trans worker (\autoref{het_by_posneg_discussion} and \autoref{het_by_posneg_discussion_predictions}, right panels). The intercept is not significantly different from 0 in either case when controls are included ($p$=\input{../../outputs/stats/het_by_posneg_discussion_listener_pval.tex} and \input{../../outputs/stats/het_by_posneg_discussion_full_predic_intercept.tex}), but the coefficient is negative. While the sample size of listeners is small, this may indicate \textit{partial} rather than total silence neglect.} This evidence is in line with theories of the ``spiral of silence'' in which people come to believe that a view is more prevalent than it truly is because of selective communication \citep{noelle-neumannSpiralSilenceTheory1974, huangBreakingSpiralSilence2023}, and work on selection neglect showing that people fail to discount how information is filtered before reaching them \citep{enkeWhatYouSee2020}.\footnote{Unlike the standard spiral-of-silence mechanism (that emphasizes social image concerns), participants' desire to persuade others appears to be the primary driver of selective communication in this context.} 




%I find evidence of silence neglect that accentuates norm updates: participants only update towards a pro-trans norm when people speak up in favor of transgender workers, but fail to account for silence from other participants that may indicate anti-trans attitudes.  When no other group members make pro-trans statements during discussions, participants predict their group members will select transgender workers at rates similar to the control group ($p$ of difference: \input{../../outputs/stats/het_by_posneg_discussion_full_predic_intercept.tex}, \autoref{het_by_posneg_discussion_predictions}). This implies that participants do not interpret silence as a signal of anti-trans attitudes, even though silence likely indicates opposition rather than indifference to selecting transgender workers. The failure to interpret silence as informative carries through to post-discussion behavior: 3-person discussion participants who hear no pro-trans statements do not increase their discrimination relative to the control group (\autoref{het_by_posneg_discussion}).\footnote{For listeners, the raw data suggests that discussions with no pro-trans statements may slightly increase predicted group members' discrimination and own private discrimination (\autoref{het_by_posneg_discussion} and \autoref{het_by_posneg_discussion_predictions}, right panels). The intercept is not significantly different from 0 in either case when controls are included ($p$=\input{../../outputs/stats/het_by_posneg_discussion_listener_pval.tex} and \input{../../outputs/stats/het_by_posneg_discussion_full_predic_intercept.tex}), but the coefficient is negative. While the sample size of listeners is small, this possibly indicates \textit{partial} rather than total silence neglect, where silence is underweighted but not completely discounted by participants.} This evidence is in line with theories of the ``spiral of silence'' in which people come to believe that a view is more prevalent than it truly is because of selective communication \citep{noelle-neumannSpiralSilenceTheory1974, huangBreakingSpiralSilence2023}.\footnote{The standard spiral of silence theory presumes that people selectively communicate because of social image concerns; I show above that persuasion concerns are a more likely driver in this context.} \textcolor{red}{CITE BEN ENKE STUFF?}

%I also find more direct evidence that participants neglect the informational content of silence when updating their beliefs about group norms. 










% \textcolor{red}{Make the link to the asymmetric willingness to speak up too... (if they correctly adjusted for this asymmetry then it wouldn't shift things...}


%\textcolor{red}{MOVE THIS DOWN}:

%\textcolor{red}{Along with the evidence of pro-trans participants' greater willingness to speak up, it also suggests a specific type of norm-updating based \textit{generating} a misperception. If participants do not account for pro-trans speakers' disproportionate willingness to speak up, the discussion will make them overestimate their group's true support for transgender workers. 






%\textcolor{red}{NEXT TWO PARAS ALL MOVE:::}

\textbf{Overcorrection of misperception}. The evidence is consistent with the discussion's effects partly stemming from correcting---and then overcorrecting---initially misperceived norms. I do find evidence of an initial misperception: the control group underestimated how often their peers would privately choose transgender workers (by \input{../../outputs/stats/misper_diff_control.tex} p.p., $p$\input{../../outputs/stats/pval_misper_control_alt.tex}, \autoref{fig_group_predic_phase2}). But discussions did more than fix this misperception—they created a new one in the opposite direction. After discussions, participants overestimated their peers' selection of transgender workers by \input{../../outputs/stats/misper_diff_treat.tex} p.p. ($p$=\input{../../outputs/stats/pval_misper_treat_alt.tex}). %\footnote{Similarly, people overestimate the pro-trans choices of speakers in the 2-person discussion arm ($p$=\input{../../outputs/stats/pval_misper_pair_speakers.tex}, \autoref{fig_group_predic_phase2}).} 
This pattern is also consistent with a ``spiral of silence'' mechanism: pro-trans participants speak up more, other participants do not sufficiently account for this, and thus those other participants overestimate support for transgender workers among their peers.






%The discussion's effects could partially stem from \textit{correcting} an initially misperceived norm. If participants initially overestimate how discriminatory their peers are, they may realize that their peers are not as discriminatory as they thought during the discussion. I do find evidence of an initial misperception: the control group underestimated how often their group members privately chose a transgender worker (\input{../../outputs/stats/misper_diff_control.tex}p.p., $p$\input{../../outputs/stats/pval_misper_control.tex}). Yet discussions did more than correct this misperception - they generated a new one favoring transgender workers, with participants overestimating selection by 2.9 p.p. after the discussion ($p$=0.10).\footnote{Similarly, people overestimate the pro-trans choices of speakers the 2-person discussion arm ($p$=0.02, \autoref{fig_group_predic_phase2}).}
% \textcolor{red}{The creation of pro-trans misperceptions, consistent with theories of the spiral of silence (\textcolor{red}{refs}), helps explain why discussions reduced discrimination more than a simple correction of initial misperceptions would predict.} (\textcolor{red}{The creation of a misperception is also consistent with the evidence that pro-trans participants speak up more, and the evidence of silence neglect.
%})
%This is effect is also consistent with a ``spiral of silence'' mechanism, in which pro-trans participants speak up more, participants do not sufficiently account for this, and thus they overestimate how pro-trans their group is. 

%\textcolor{red}{Note that some of the effect could be driven by correcting the misperception}


%\textcolor{red}{Make clear that some of the effect could come from the correction of the norm, and the rest is from generating a misperception throughout the course of the discussion}





%
%
%\textbf{Ratcheting misperceptions???}
%
%\textcolor{red}{A bit confused here -- in the observer graph, misperception is ``behind'' the actual behavior}
%
%\textcolor{red}{But in discussion graph, mispercepton is ``ahead'' of actual behavior}
%
%	
%	
%	
	
%\textcolor{red}{again suggesting that the discussion does not \textit{only} correct a misperceived social norm but instead generates a misperception. This misperception is likely driven by pro-trans participants' differential willingness to speak up, participants' silence neglect, and perhaps (as discussed in Section \ref{sec_transcripts}) the effects of actual content of the discussion beyond simply the choices made.}
	
	
%\section{OLD Mechanisms: behavior during the discussion} 
%	\label{sec_beh_during_discussion}
	
%	How did group discussions reduce discrimination? To answer this, I will (i) explore what behavior occurred during the discussion (this section); (ii) use mechanism outcomes and treatments to assess why the behavior during the discussion affected post-discussion behavior (Section \ref{sec_mechanisms_post_discussion}); and (iii) use detailed discussion transcripts to unpack the black box of the discussion, evaluating how the \textit{content} of the discussion is associated with post-discussion discrimination (Section \ref{sec_transcripts}). 	
	
%	assesses a number of alternative explanations for the results that are not supported by the data.} \textcolor{red}{POSSIBLY ADD A SENTENCE ON PREFERRED EXPLANATION (as signposting)}
	
%	\textcolor{red}{Finally, I provide evidence against a number of alternative explanations of the results, including (i) other characteristics of the photos of workers, such as perceived caste; (ii) social image concerns that affect even private, post-discussion choices; (iii) increased contemplation about choices; (iv) experimenter demand effects or social desirability bias; (v) increased salience of the notion of transgender; and (vi) cheap talk due to low stakes.}
	
	
	


\subsection{Rhetorical asymmetry and moral arguments}
\label{sec_transcripts}

The discussion transcripts reveal a third source of asymmetry: the types of arguments participants used, and how persuasive they appear to be. Pro-trans advocates more frequently invoked moral concepts (like equality, rights, and opportunity) when discussing transgender workers, and this moral framing is strongly associated with lower post-discussion discrimination. While this pattern is only correlational, it suggests a form of \textit{rhetorical asymmetry} \citep{schkadeDeliberatingDollarsSeverity2002} in which pro-transgender arguments were more persuasive than anti-transgender ones. The pattern emerges consistently across multiple analyses: a sentence-level transcript analysis, AI-generated hypotheses about discussion characteristics, and enumerator observations of discussions. The relationship holds for silent listeners, for which causality is more plausible, and even after controlling for within-discussion choices. These findings align with psychological research on moral reframing, which shows that arguments framed in moral terms are particularly persuasive when they align with listeners' values \citep{voelkelMorallyReframedArguments2018, feinbergMoralReframingTechnique2019}. Combined with the evidence on norm-updating, this suggests that moral arguments may serve as powerful signals of anti-discriminatory norms.




%This use of persuasive moral argumentation seems to underly a form of \textit{rhetorical asymmetry} (\textcolor{red}{CITE KAHEMAN SCHADKE ETC.}), in which pro-trans arguments are simply more persuasive than anti-trans arguments. 

%\textcolor{red}{Rhetorical asymmetry (Kaheman et al) -- cite here...}




%\textcolor{red}{MAKE THIS SECTION MORE CONCISE}





%\textbf{One prima facie piece of evidence...}





%
%
% \textcolor{red}{Finally, I open the black box of the discussion to understand whether the content of the discussion mattered above and beyond simply observing others select transgender workers. Indeed, in line with a psychology literature on \textit{moral reframing} that finds that people can be more influenced by messages that are framed in a moral sense when it aligns with their moral values, I find that when participants are part of a discussion where morality is invoked they discriminate significantly less (and that this does not operate solely by changing the chocies during the discusison). While this evidence is only correlational, it does suggest that the content of the discussion matters. In combination with the evidence on norm-updating, it suggests that discussions of morality are likely to act as a signal of a strong anti-discriminatory norm, and that this leads to participants discriminating less after the discussion. 
%}
%
%This fits with work in psychology suggesting that morally reframing arguments can increase how effective they are \citep{voelkelMorallyReframedArguments2018, feinbergMoralReframingTechnique2019}.
%
%This section uses the discussion transcripts to show that the content of discussions appears to have mattered beyond \textcolor{red}{simply observing others' choices}. Discussions about transgender workers more frequently invoke notions of \textit{morality} (e.g., discussing equality, rights, or giving opportunities to workers).
%
%Moreover, in line with a psychology literature on \textit{moral reframing} that finds that people can be more influenced by messages that are framed in a moral sense when it aligns with their moral values voelkelMorallyReframedArguments2018, feinbergMoralReframingTechnique2019, I find that when participants are part of a discussion where morality is invoked they discriminate significantly less (and that this does not operate solely by changing the chocies during the discusison)
%
%
%
% and this predicts lower post-discussion discrimination. This pattern emerges across multiple analyses: a sentence-level transcript analysis, AI-generated hypotheses about discussion characteristics, and enumerator observations of discussions. The relationship holds for silent listeners, for which causality is more plausible, and even after controlling for within-discussion choices. These patterns thus indicate that the discussion matters not only because it allows participants to observe others' choices, but because they make arguments to each other that act as signals of the social norms. \textcolor{red}{clearer link to norm-updating}
%
%This is in line with work in psychology on \textit{moral reframing} 
%
%
%\textcolor{red}{ADD STUFF ON MORAL REFRAMING LITERATURE}
%





%Groups that discuss morality show larger reductions in discrimination even after controlling for within-discussion choices. The relationship holds for silent listeners, suggesting moral content causally affects behavior rather than merely reflecting group composition. These patterns indicate discussion content shapes behavior beyond choice observation, either by signaling stronger prescriptive norms or by changing private normative beliefs.



%Here I document evidence that the actual \textit{content} of the discussion matters, above and beyond simply observing choices made by others, people are talking to each other in a way that changes either their private normative beliefs or their beliefs about the prevailing prescriptive norm of behaviour towards transgender workers.

%I first document patterns showing that discussions about transgender workers are more likely to invoke notions of ``morality'', and that such discussion appear to be associated with lower post-discussion discrimination. This pattern holds when analyzing at the sentence-level in the transcripts, when testing AI-generated hypotheses at the entire discussion level, and when examining enumerator-marked observations. I then more directly test the hypothesis that morality matters by rating each discussion on how much it invokes morality and examining whether this predicts reductions in post-discussion discrimination even after controlling for within-discussion choices. Indeed morality is highly predictive of post-discussion discrimination. While these patterns of heterogeneity are not driven by exogenous variation, the fact that the pattern holds for listeners for the enumerator observations (who have no opportunity to actively influence the discussion) suggests that the associations are likely to partly measure causal effects.

%The fact that discussions of morality appear to be associated with lower post-discussion discrimination indicates that the content of the arguments in the discussion is important, above and beyond simply observing others’ choices. That content could be affecting post-discussion choices through either of the main channels above: discussing morality could act as a signal that the prescriptive norm of behaviour towards trans workers is less discriminatory, or it could directly persuade participants to change their private normative beliefs.



\textbf{Transcript sentences}. Sentence-level semantic analysis of the 3-person discussions suggests that moral language predicts reductions in post-discussion discrimination. 
Using OpenAI's \textit{embedding-3-small} model, I use $k$-means clustering to classify sentences based on similarity into 20 groups (\autoref{transcript_cluster_quotes}, see Appendix \ref{sec_appendix_transcript_sentence} for methodology). Sentences about ``giving opportunities'' to a worker and sentences that explicitly acknowledge that a worker is transgender are especially common for choices involving a transgender worker.\footnote{Indeed, the transcript quotes that were \textit{most} uniquely representative of transgender discussions (relative to non-transgender discussions, based on relative cosine similarity of embeddings) were ``\textit{\input{../../outputs/stats/top_quote.tex}}'' and ``\textit{\input{../../outputs/stats/top_quote_no_trans.tex}}''.
Participants were especially likely to use words like  ``opportunity'', ``chance'', and ``rights'' (see word cloud in \autoref{fig_word_cloud}). }
These sentences also strongly predict selecting transgender workers privately after the discussion: a 1 SD increase in ``giving opportunities'' sentences corresponds to a \input{../../outputs/stats/coeff_transcript_cluster_1.tex} p.p. decrease in post-discussion discrimination ($q$=\input{../../outputs/stats/pval_transcript_cluster_1.tex}, cluster 1), and a 1 SD increase in explicitly acknowledging transgender identity predicts a \input{../../outputs/stats/coeff_transcript_cluster_2.tex} p.p. increase ($q$=\input{../../outputs/stats/pval_transcript_cluster_2.tex}, cluster 2). In contrast, discussions that focused on workers' appearance predict weaker treatment effects ($\beta$=\input{../../outputs/stats/coeff_transcript_cluster_12.tex} p.p., $q$=\input{../../outputs/stats/pval_transcript_cluster_12.tex}, cluster 3), in line with stereotypes portraying transgender people as looking strange or untrustworthy.
%\footnote{The most representative quotes were calculated by: (i) extracting the embeddings of each quote from the transcript data using OpenAI's \textit{embedding-3-small} model; (ii) separately calculating the mean embedding of discussions that do and don't involve transgender workers; (iii) taking the quote that maximizes the difference between cosine similarity with the average transgender discussion and the cosine similarity with the average non-transgender discussion. The second quote is the maximum that excludes an explicit mention of the word ``transgender''.}.
%Sentences about ``giving opportunities'' to a worker strongly predict selecting transgender workers after discussions: a 1 SD increase in such statements corresponds to a \input{../../outputs/stats/coeff_transcript_cluster_1.tex} p.p. increase in selecting transgender workers after the discussion ($q$=\input{../../outputs/stats/pval_transcript_cluster_1.tex}, cluster 1). Similarly, explicitly acknowledging workers' transgender identity predicts a \input{../../outputs/stats/coeff_transcript_cluster_2.tex} p.p. increase ($q$=\input{../../outputs/stats/pval_transcript_cluster_2.tex}, cluster 2).  In contrast, discussions that focused on workers' appearance predict weaker treatment effects ($\beta$=\input{../../outputs/stats/coeff_transcript_cluster_12.tex}, $q$=\input{../../outputs/stats/pval_transcript_cluster_12.tex}, cluster 3).

%Indeed, the transcript quotes that were \textit{most} uniquely representative of transgender discussions (relative to non-transgender discussions) were ``\textit{\input{../../outputs/stats/top_quote.tex}}'' and ``\textit{\input{../../outputs/stats/top_quote_no_trans.tex}}''.\footnote{The most representative quotes were calculated by: (i) extracting the embeddings of each quote from the transcript data using OpenAI's \textit{embedding-3-small} model; (ii) separately calculating the mean embedding of discussions that do and don't involve transgender workers; (iii) taking the quote that maximizes the difference between cosine similarity with the average transgender discussion and the cosine similarity with the average non-transgender discussion. The second quote is the maximum that excludes an explicit mention of the word ``transgender''.}.





%I analyze the written transcripts that are available for the \input{../../outputs/stats/prop_audio_consent_yes.tex} of \textit{3-person discussion} transcripts who consented. \textcolor{red}{Note - consent is uncorrelated with outcomes}.

%I first analyze at the sentence level, looking at the types of sentences that are correlated with discussions about transgender workers and lower subsequent discrimination.

%To do this, I calculate the semantic embeddings of each sentence in the transcripts using OpenAI's \textit{embedding-3-small} model, and use k-means clustering to classify each sentence said by a respondent into one of 20 clusters that represent sentences with similar meanings.

%I then examine the proportion of respondent sentences that fit in each cluster for discussions that include a transgender worker option, and those that don't (\autoref{transcript_cluster_quotes}). I also calculate the association between discussions containing those utterances, and the probability of discriminating in private post-discussion choices. 

%When the discussion mentions wanting to ``give [a worker] opportunities'', pariticipants are much less likely to discriminate after the discussion (a 1 SD increase in the proportion of times this type of sentence is mentioned is associated with a \textcolor{red}{6.8 p.p.} ($q$=\textcolor{red}{XXX}, cluster 1) increase in the probability of selecting a trans worker in the post-discussion choices. Groups who explicitly mention that a worker is transgender are also substantially less likely to discriminate after the discussion ($\beta$=5.7 p.p., $q$=\textcolor{red}{XXX}, cluster 2). By contrast, groups who focus on a worker's appearance or include statements where participants simply agree with a previous participant are less likely to select trans workers privately after the dsicussion.



\textbf{Broader transcript features}. To analyze higher-level characteristics of the discussion that are correlated with reduced discrimination, I follow recent work that uses AI to generate hypotheses  \citep{ludwigMachineLearningTool2024a, batistaWordsThatWork2024}. Specifically, I (i) use a large language model (LLM) to generate 500 hypotheses about the main difference between two randomly selected transcripts (without revealing any post-discussion outcomes); (ii) use an LLM to rate all transcripts based on how much they satisfy each hypothesis; (iii) combine hypotheses whose ratings are highly correlated using factor analysis; and (iv) test whether the aggregate rating is correlated with post-discussion discrimination (see Appendix \ref{sec_appendix_transcript_hypothesis_generation}).

Using this method, I find that discussions where the transgender worker's identity is explicitly acknowledged and with a stronger focus on social equity and support for marginalized individuals are strongly associated with less discrimination ($q$$<$0.001, \autoref{fig_hypothesis_effects_fct}, rows 1 and 2). A focus on gender identity and on grocery items during the transgender discussion is also associated with less post-discussion discrimination ($q$$<$0.05, rows 3 and 4). In contrast, discussions about transgender workers that focus on other details about the worker (e.g., the language proficiency or delivery performance) are associated with weaker treatment effects ($q$=\input{../../outputs/stats/pval_hypothesis_effects_fct_language_proficiency.tex} and \input{../../outputs/stats/pval_hypothesis_effects_fct_delivery_performance.tex}), in line with the view that moral arguments are more persuasive than practical ones in this context. Notably, there do not seem to be moral \textit{anti}-transgender arguments that are persuasive: anti-trans participants only resort to details about the workers' qualifications.



%To understand if there are higher-level features of the discussions that predict post-discussion discrimination, I follow a burgeoning literature that uses AI to generate hypotheses \citep{ludwigMachineLearningTool2024a, batistaWordsThatWork2024} and use an LLM to (i) generate 500 hypotheses about the main difference between two randomly selected transcripts (without revealing that the goal is to predict discrimination, or revealing which transcript is associated with lower post-discussion discrimination); (ii) rate all transcripts based on how much they align with the hypotheses; (iii) evaluate whether this rating is correlated with changes in discrimination in the private outcome round (see more detail \textcolor{red}{Appendix Section XX}).

%This exercise shows that discussions where the transgender worker's identity is explicitly acknowledged and with a stronger focus on social equity and support for marginalized individuals is strongly associated with less discrimination. A focus on gender identity and on grocery items during the transgender discussion is also associated with less post-discussion discrimination. By contrast, focusing on other details about the worker (language proficiency or delivery performance) is associated with more post-discussion disicrimination \textcolor{red}{need to explain a bit??}

\textbf{Enumerator observations}. Enumerators' observations are consistent with this picture. When discussing transgender workers, participants more frequently cite moral rationales (e.g., wanting to give an opportunity to or help the worker; saying that the worker is ``also human'') compared to discussions of non-transgender workers (difference = \input{../../outputs/stats/effect_pro_social.tex} p.p., $p$\input{../../outputs/stats/effect_pro_social_p.tex}, \autoref{fig_reasons_r1_categories}). And these moral rationales correlate with lower discrimination after the discussion: listeners who heard them were \input{../../outputs/stats/rsn_pro_social.tex} p.p. more likely to select transgender workers afterward ($p$$<$0.001).\footnote{The increase in pro-social reasoning translates to participants' reported reasoning in the private outcome round. When participants were asked why they made their outcome-round choices, those who had been involved in a discussion were \input{../../outputs/stats/coeff_discussion_pro_social.tex} p.p. (\input{../../outputs/stats/perc_discussion_pro_social_ratio.tex}) more likely than the control group to cite pro-social reasons for their choices ($p$=\input{../../outputs/stats/p_val_discussion_pro_social.tex}).}




%I also confirm this picture using enumerator observations: when faced with a choice involving a transgender worker, enumerators recorded that participants were significantly more likely to cite moral rationales (e.g., wanting to give an opportunity to or help the worker; saying that the worker is ``also human'') for their choices during the discussion according to enumerator observations, see \autoref{fig_reasons_r1_categories}, difference compared to non-transgender choices: \input{../../outputs/stats/effect_pro_social.tex} p.p., $p$\input{../../outputs/stats/effect_pro_social_p.tex}).
 
% There is some correlational evidence that the shift towards moral reasoning may \textcolor{red}{\textit{persuade}} others to discriminate less in the outcome round. First, the ``listeners" in the 2-person discussion arm were \input{../../outputs/stats/rsn_pro_social.tex} p.p. ($p$$<$0.001) more likely to choose transgender workers in the outcome round if they heard pro-social reasons in the discussion. 



  

\textbf{Moral discussions}. The evidence above suggests that discussions focusing on giving opportunities, social equity, or other moral notions are associated with reductions in post-discussion discrimination, in keeping with the psychological literature on moral reframing \citep{voelkelMorallyReframedArguments2018, feinbergMoralReframingTechnique2019}. To more directly test the hypothesis that moral reframing reduces discrimination, I use an LLM to rate how much each discussion transcript invoked morality on a 0--1 scale (see Appendix \ref{sec_appendix_transcript_morality}).
%
%
% discussions were using the transcripts of the discussion using a prompt that asks them to rate it based on ``Analyze the transcript by rating how much the discussants invoke morality in their decision-making process on a scale from 0 to 1.'' (\textcolor{red}{see details in Appendix XXX}).
%
 Participants used moral language substantially more when discussing transgender workers (from \input{../../outputs/stats/morality_mean_non_trans.tex} to \input{../../outputs/stats/morality_mean_trans.tex}, +\input{../../outputs/stats/morality_means_diff.tex} SD, $p$\input{../../outputs/stats/morality_means_diff_p.tex}). 
% 
 And this moral language is highly predictive of post-discussion discrimination: a 1 SD increase in morality rating corresponds to being \input{../../outputs/stats/morality_rating_trans.tex} p.p. more likely to select transgender workers after the discussion ($p$$<$0.001, \autoref{models_morality}, column 1), even when controlling for general pro-trans statements (column 2) and choices (column 3). While this evidence is only correlational, it suggests that the moral content of discussions may reduce discrimination independently of the within-discussion choices.
% 
Moral language moreover appears to operate through the \textit{norm-updating} channel described above, i.e., by affecting participants' perceptions of the norm of behavior towards transgender workers: a 1 SD more moral discussion is also associated with large updates in within-group norms (\input{../../outputs/stats/morality_rating_trans_sob.tex} p.p. when controlling for treatment round choices, $p$=\input{../../outputs/stats/morality_rating_trans_sob_p.tex}).
 
 
% \textcolor{red}{}
 
 
 
% \textcolor{red}{Morality also updates SOBs a lot...}}--- likely to be updating norms
 
 
 
 
 \textbf{Heterogeneity on persuasiveness}. One piece of suggestive evidence against persuasion and rhetorical asymmetry is that participants who rate their group members as more persuasive in a pre-discussion question do not discriminate less afterward. Neither listeners nor discussion participants discriminate less when their peers score higher on a pre-discussion persuasiveness index ($p$=\input{../../outputs/stats/pval_het_group_characs_listener.tex} and \input{../../outputs/stats/pval_het_group_characs_discussion.tex}, \autoref{tab_het_group}). This measure has limitations---it captures perceptions of a persuasive personality before the discussion, rather than transgender-specific advocacy or likelihood of speaking up. The null result thus doesn't preclude effects stemming from pro-trans participants being more willing to speak up or from selecting more persuasive moral arguments specifically when discussing transgender workers. Nevertheless, the null result suggests caution in attributing effects entirely to rhetorical asymmetry.
 
% While this measure captures general persuasiveness rather than transgender-specific advocacy or likelihood of speaking up, the null result suggests caution in attributing effects entirely to rhetorical asymmetry.
 
% A limitation of this measure is that it focuses on \textit{participant-level} persuasiveness, and does not focus on transgender-specific argumentation or the probability of being vocal. It therefore does not rule out increased persuasion being driven by a disproportionate willingness to speak up (rather than being more persuasive conditional on speaking up), or by the arguments specific to arguing against discirmination (such as all the moral arguments documented above). Nevertheless, the lack of heterogeneity means we should be cautious about the evidence in favor of rhetorical asymmetry.
 
 
 
 
 
 
 
% While this challenges persuasion-based explanations, three caveats apply. First, participants with high persuasiveness scores do not advocate for transgender workers more often ($\rho$=\input{../../outputs/stats/ls_inv_cov_spoke_pro_trans_corr.tex}, $p$=\input{../../outputs/stats/ls_inv_cov_spoke_pro_trans_pval.tex}), so persuasion could be occuring through differences in speaking up, rather than through differences in persuasiveness conditional on speaking up. Second, the index measured general rather than transgender-specific persuasiveness, but a general propensity to speak up is not correlated with reduced discrimination (\autoref{tab_discussion_dominance}). Finally, there are two sub-measures that do correlate with larger treatment effects -- being ``like a leader'' or ``inspiring'' ($p$=\input{../../outputs/stats/ls3_z_other_spoke_pro_trans_pval.tex} and \input{../../outputs/stats/ls9_z_other_spoke_pro_trans_pval.tex}) -- and these plausibly indicate group members who are better at signalling norms (rather than those who are likely to speak up), leaving open the possibility that norm-based persuasion is driving the results.


%\textcolor{red}{EXPLANATION - persasuveness is not really at the individual level-- more at the argument level. It's being persuasive about this thing...}

 
 
 
 
 
 
 
 
%Discussions including a transgender worker option are \input{../../outputs/stats/morality_means_diff.tex} SD ($p$\input{../../outputs/stats/morality_means_diff_p.tex}) more moral based on this rating.
 
% I then show that how moral the discussions are about transgender workers is highly predictive of post-discussion discrimination. Participants involved in a discussion about trans workers that is 1 SD more moral are \input{../../outputs/stats/morality_rating_trans.tex} more likely to 
%  (\autoref{models_morality}, $p$$<$0.001, column 1) select trans workers in the private outcome round of hiring. This effect is only for discussions about trans workers, not non-trans workers.\footnote{  \textcolor{red}{Morality also updates SOBs a lot...}} The correlation is still significant when controlling for how often other group members spoke in favour of trans workers (column 2), and for how many times the group chose trans workers in the discussion (column 3), suggesting that discussions of morality reduce discrimination above and beyond simply through the discussion choices. \textcolor{red}{I interpret this as evidence that the content of the discussion has meaningful effects beyond the choices.}
  

%\textcolor{red}{NOTE - this is all heterogeneity, not causal effects, so need to be cautious, but heterogeneity is consistent with the idea that content of the discussions matter.}
  
   

\begin{table}[!htbp]
\caption{Morality rating of discussion predicts reduced post-discussion discrimination}
\label{models_morality}
\centering
\resizebox{1\textwidth}{!}{
\input{../../outputs/tables/models_morality}
}
\begin{tablenotes}
	\footnotesize
\item	\textit{Notes}: Sample includes \textit{3-person discussion} and \textit{No discussion (private)} participants, so the control group is the omitted category. \textit{Discussion morality score} is the morality score of the discussion transcripts rated by an LLM, designed to measure how much morality is invoked (see Section \ref{sec_transcripts} and Appendix \ref{sec_appendix_transcript}). Each discussion-choice transcript receives a rating. I average these ratings for each group's transgender and non-transgender choices separately, normalizing to ensure mean of 0 and standard deviation of 1. \textit{Others in group: proportion of discussions spoke in favor of trans} is the proportion of times (out of a maximum of 4) the other group members spoke positively about a transgender option. \textit{Num. times group chose trans in treatment round} is the number of times transgender worker was chosen by the group in the treatment round. All 4 of these additional predictors are coded as 0 for the control group. 
%\item \textcolor{red}{Notes: omitted category is control group, coded as 0 for predictors. Only available for 3-person discussions. Is morality score normalized separatly for trans and non-trans or not? LASSO controls etc. included}. Mean discussion morality score is \input{../../outputs/stats/morality_mean_trans.tex} for transgender choices and \input{../../outputs/stats/morality_mean_non_trans.tex} for non-transgender choices. For the table, I normalize by the SD of the non-trans choices.
\end{tablenotes}
\end{table}
 












 
 




%There is some correlational evidence that the shift towards pro-social reasoning may \textit{persuade} others to discriminate less in the outcome round. First, the ``listeners" in the 2-person discussion arm were \input{../../outputs/stats/rsn_pro_social.tex} p.p. ($p$$<$0.001) more likely to choose transgender workers in the outcome round if they heard pro-social reasons in the discussion. Second, the increase in pro-social reasoning translates to participants' reported reasoning in the private outcome round. When participants were asked why they made their outcome-round choices, those who had been involved in a discussion were \input{../../outputs/stats/coeff_discussion_pro_social.tex} p.p. (\input{../../outputs/stats/perc_discussion_pro_social_ratio.tex}) more likely than the control group to cite pro-social reasons for their choices ($p$=\input{../../outputs/stats/p_val_discussion_pro_social.tex}).



















%\textbf{Enumerator observations}. 


%\textbf{Transcript data}.













%\newpage


\begin{comment}
	

\section{\textcolor{red}{Mechanisms: What is behind the effect of the discussion?}}

%{\color{red}CUT THIS SECTION}

Having documented the main results and pro-trans behavior during the discussion, I now seek to understand \textit{why} horizontal communication between privately discriminatory individuals led to strong reductions in discrimination. I examine three candidate mechanisms that could explain this dynamic:
\begin{enumerate}[(1)]
\itemsep-0.5em
  \item \textit{Correcting a misperceived norm}. Participants may initially overestimate how discriminatory their peers are. When they communicate, they realize that their peers are not as discriminatory as they thought, and so subsequently feel more comfortable selecting a transgender worker.
  \item \textit{Virtue signaling}. Participants want to \textit{appear} to be a ``good person'', i.e., not to be discriminatory, in a group setting. They therefore act positively towards transgender workers in the discussion and in doing so encourage others to discriminate less afterwards.
  \item \textit{Persuasion and decision to speak up}. Participants change each other's preferences for selecting a transgender worker by sharing persuasive narratives. People who are more pro-trans are more vocal in discussions, leading groups to become overall less discriminatory.
\end{enumerate}
Below, I document the evidence that channels (1) and (2) are not sufficient to explain the large effects of the discussion, whereas channel (3) is supported by the data and could explain large effects. %I show that the persuasion channel could be driven by pro-trans participants choosing to be more vocal in discussions and persuading others not to discriminate.
% I first show evidence against a \textit{misperception} mechanism, in which participants 




\textcolor{red}{The implied \textit{persuasion rate} is large: \input{../../outputs/stats/persuasion_rate.tex} of listeners who would not otherwise have chosen transgender workers change their behavior. This is at the high end of the values seen in the literature on persuasion \citep{dellavignaPersuasionEmpiricalEvidence2010} \textcolor{red}{but note that it is plausible, in line with persuasion effects of in person things}. }


%The evidence thus far has shown that a strong pro-trans norm emerges 

%In this section, to further understand the emergence of a strong pro-trans norm in the group discussion, I first examine participants' behavior during the discussion itself. I then analyze the effects of the phase 2 mechanism treatments, which show that (i) listening to a discussion is just as effective at reducing discrimination as participating in one, and (ii) increased social image concerns from being in a group are not sufficient to explain the effects of the discussion. 





\subsection{Correcting a misperceived norm}
\label{sec_correct_misperceptions}

	\begin{figure}[!htb]
		
		\centering
		\caption{Evidence of misperceptions: predictions within a group of 3 (pairs involving transgender workers only)}
			\includegraphics[width=0.6\linewidth]{../../outputs/figs/group_predic.pdf}
		\label{fig_group_predic}
		\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: Sample includes all participants in the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phases.  Unit of observation is participant $\times$ prediction. Only choices that include a transgender photo are included. Hollow bars represent the probability that a participant predicts that their group-member selects a transgender delivery worker. The prediction was incentivized. Each participant made 2 predictions (one involving a transgender worker) for each of their 2 group members. The two predictions involving a transgender worker are included for analysis. Filled bars represent the actual probability that participants select a transgender worker in the outcome round (restricting to only choices for which another group member made a prediction).
		\end{tablenotes}
	\end{figure}


%\textcolor{red}{\textbf{EXPLAIN METHODOLOGY}}
%\textcolor{red}{Make sure I reference to Appendix \ref{sec_data} somewhere for design of intermediate outcomes}

%\textcolor{red}{\textit{Beliefs about others' choices (group).} Participants then made incentivized predictions of the private hiring choices of the other two people \textit{in their group}. For each of the other two group members, they were asked to predict which option the other person chose for two pairs of delivery options.
% \footnote{If they correctly guessed all 4 combinations they were entered into a second lottery to win a separate prize, also worth 3000 Rs. When participants were making their main hiring choices, they did not know that their neighbors would later be paid for predicting their answers. This rules out concerns that they tried to make their hiring choices more predictable in order to help out their neighbors.}
%   The discussion caused a large increase in the predicted probability that a fellow group-member selects a transgender worker (\autoref{tab_attitudes_beliefs_norms}, panel B, column 2; \input{../../outputs/stats/effect_norms_group.tex} p.p., $p$\input{../../outputs/stats/p_val_group_norms.tex}).
%
%\textit{A priori}, the discussion effects could be driven by the process of correcting a misperceived norm.
 If participants initially overestimate how discriminatory their peers are, and if horizontal communication corrects that misperception,
%If participants in the group discussion share information and opinions,
%\footnote{In line with the claim that participants are sharing information about each other's preferences, participants' predictions about others in their group also become more \textit{accurate} (\autoref{fig_group_predic_accuracy}).}
% they may realize that people in their group are less discriminatory than they previously thought. 
  participants might feel more comfortable selecting a transgender worker after the discussion has finished.\footnote{
 This idea is motivated by evidence in other contexts showing that correcting misperceptions about discriminatory norms can reduce anti-minority behavior \citep{bursztynMisperceivedSocialNorms2020}. 
Alternatively, if participants initially \textit{underestimated} how discriminatory their peers were, and this misperception was not corrected in the discussion, they may have faced social pressure to discriminate less in the group discussion. \autoref{fig_group_predic} shows that this does not fit the data.}
\autoref{fig_group_predic} examines this hypothesis by displaying participants'  \textit{within-group} predictions about others' private choices, and comparing them to the true probability of selecting a transgender worker. Participants made incentivized predictions of the private hiring choices of the other two people \textit{in their group}. For each of the other two group members, they were asked to predict which option the other person chose for two pairs of delivery options (see Appendix \ref{sec_data} for more detail).












In line with the proposed channel, control participants underestimate the probability that their group members select a transgender worker by \input{../../outputs/stats/misper_diff_control.tex} p.p. ($p$\input{../../outputs/stats/pval_misper_control.tex}), suggesting an initial overestimation of discrimination. However, a corrected misperception is not sufficient to explain the discussion's effects. 
%
%
%
%
%
%
%% By contrast, participants who have been in a group discussion have a slightly \textit{overestimate} pro-trans choices by \input{../../outputs/stats/misper_diff_treat.tex} p.p., $p$=\input{../../outputs/stats/pval_misper_treat.tex}).
%
%
%
%
%%participants' behavior is partly driven by their perceptions of the prevailing social norm, the correction of the misperception could be part of what generates the reduction in discrimination in the treatment group. 
%If participants share information and opinion in the group discussion, and in-so-doing realize that others are less discriminatory than they previously thought, then the discussion will reduce discrimination.
While the discussion does stop participants overestimating discrimination, it also generates a large level-shift of roughly 20 p.p. in \textit{both} the predictions and actual choices: people discriminate significantly less than would be the case if the control group's misperceptions were simply corrected.\footnote{%
A second piece of evidence against the misperception channel is based on  \textit{No discussion (public)} participants, who were told the \textit{public} choices of others in their group before making predictions about \textit{private} choices. They also had their misperceptions corrected (\autoref{fig_group_predic_phase2}, estimate of misperception: $\input{../../outputs/stats/misper_public.tex}$ p.p., $p$=\input{../../outputs/stats/misper_public_pval.tex}), but the effect on discrimination in this arm was much smaller than the effect of the discussion (\autoref{tab_mech_r1_r2}).
}  The control-group misperception was \input{../../outputs/stats/misper_diff_control.tex} p.p., and the total change in beliefs was \input{../../outputs/stats/effect_norms_group.tex} p.p. (\autoref{tab_attitudes_beliefs_norms}, panel B, column 2). Thus, even under the very generous assumption that the post-discussion effect on discrimination was \textit{solely} driven by changes in second-order beliefs, correcting the misperception would only account for \input{../../outputs/stats/misperc_accounting.tex} (bootstrap 95\% CI: [\input{../../outputs/stats/misper_accounting_lower.tex}, \input{../../outputs/stats/misper_accounting_upper.tex}]) of the discussion's treatment effect.\footnote{This back-of-the-envelope calculation also assumes that second-order beliefs translate \textit{linearly} to post-discussion discrimination, and that beliefs correct precisely and do not overshoot.} Thus, although correcting a misperceived norm might contribute to the discussion's impact, it is unlikely to account for the whole effect.\footnote{
Participants also made incentivized predictions about the choices of others in the study whom they did not know. The predicted probability of selecting a transgender worker increased by a modest \input{../../outputs/stats/effect_norms.tex} p.p. (\input{../../outputs/stats/effect_norms_perc.tex}) in the discussion arm (\autoref{tab_attitudes_beliefs_norms}, panel B, column 1). However, these do not appear to mediate the effects on discrimination: controlling for within-group norms significantly attenuates the effect on discrimination, while controlling for community-wide norms does not (\autoref{tab_mediation}).
}






%\textcolor{red}{FOOTNOTE ON COMMUNITY NORMS}

%\textit{Beliefs about others' choices (community).} First, participants made incentivized predictions about the choices of others in the study whom they did not know. They were shown 3 pairs of delivery options, and truthfully told that 20 other people in the study had been shown those pairs. They had to predict how many of those 20 picked each option.
%\footnote{
%If they made the closest guess on average across all 3 pairs, they were entered into a lottery to win 3000 Rs.' worth of additional items.
%} 
%For the one pair that compared a male and a transgender, the predicted probability of selecting the transgender worker increased by a modest \input{../../outputs/stats/effect_norms.tex} p.p. (\input{../../outputs/stats/effect_norms_perc.tex}) in the discussion arm (\autoref{tab_attitudes_beliefs_norms}, panel B, column 1).


%The larger magnitude of the update in predictions \textit{within} the group suggests that the effect of the discussion is likely to be mediated by a shift in group-level norms. %\footnote{However, because the elicitation methods vary across the measures, comparisons of the magnitudes of the treatment effects should be interpreted with caution.} 
%In line with the claim that effects are mediated by group norms, controlling for group norms significantly attenuates the effect on discrimination, while controlling for community-wide norms does not (\autoref{tab_mediation}).






%Any misperceptions about the level of discrimination in one's group could explain the effects of the discussion in two ways. 






%Participants in the control group actually \textit{underestimate} the prevalence of pro-trans behavior in their group by \input{../../outputs/stats/misper_diff_control.tex} percentage points ($p$\input{../../outputs/stats/pval_misper_control.tex}). {\color{red}(CONFUSING) And for participants who predict after taking part in a discussion, this misperception has been corrected (indeed, participants even slightly overestimate how many others select transgender workers after the discussion} ($p$=\input{../../outputs/stats/pval_misper_treat.tex})).



%However, the correction of the misperception \textit{alone} does not appear to be sufficient to generate the large effects of the discussion seen on hiring choices, for two reasons.

%First, \autoref{fig_group_predic} clearly shows a large level shift in the predictions and actual choices. Participants' actual choices in the \textit{3-person discussion} arm increase from around 40\% to about 60\% -- a jump that is correctly estimated by participants. Thus, to accounting for the discussion's effects, a model needs to describe how a whole group can be persuaded, instead of just describing a corrected misperception.

%Second, participants in the \textit{No discussion (public)} condition also have their misperceptions corrected (\autoref{fig_group_predic_phase2}). After being told the treatment round choices of others in their group, participants correctly estimate that their fellow group members select a transgender around 40\% of the time in the outcome round (estimate of misperception: $\input{../../outputs/stats/misper_public.tex}$ p.p., $p$=\input{../../outputs/stats/misper_public_pval.tex}). But since the \textit{No discussion (public)} arm had much smaller effects on participants' hiring choices than the group discussion, correcting a misperception cannot be sufficient to explain the effects of the discussion.

%Nevertheless, the changes in beliefs about others in their group and the correction of the misperception could be partial drivers of the discussion effect when embedded in a dynamic model of the discussion that allows for persuasion.



\subsection{Virtue signaling}

The virtue signaling channel proposes that participants have social image concerns, and so in group settings take pro-trans actions in order to not appear discriminatory \citep{benabouIncentivesProsocialBehavior2006, dellavignaTestingAltruismSocial2012, bursztynSocialImageEconomic2017a}. These pro-trans behaviors may persuade others to be less discriminatory after the discussion has ended.

%If participants generate  may therefore generate pro-trans communication that creates a pro-trans norm in the group and persuades others to be less discriminatory after the discussion has finished. 

%{\color{red}add strategic cites on social image concerns papers}
Using the \textit{No discussion (public)} arm, I test for virtue signaling by examining whether social image concerns alone can promote pro-trans choices in a group. Participants in this arm knew others would see their choices in the treatment round, but did not discuss those choices. If virtue signaling was driving behavior, we would therefore expect more pro-trans choices in this public setting.

Empirically, virtue signaling alone does not appear to be sufficient to explain the effects of the discussion. The \textit{No discussion (public)} treatment did \textit{not} make participants choose transgender workers more often in the treatment round on average (\autoref{tab_mech_r1_r2} column 1, $p$=\input{../../outputs/stats/p_val_r1_public.tex}), implying that exogenously increasing social image concerns does not discourage participants from discriminating.\footnote{%
This was not because the treatment had no effect on behavior: participants within a group converged in their likelihood of selecting a transgender compared to the control group ($p$=\input{../../outputs/stats/ri_p_icc_private_public.tex}), suggesting that when choices were visible, participants tended to match the behavior of their group members (\autoref{tab_icc}).
Alternatively, one might think that pro-trans \textit{communication} is a stronger virtue signal than pro-trans choices (for example, because there is more plausible deniability when making choices without having to verbally explain them). 
%If pro-trans \textit{communication} in the discussion is a costlier (stronger) virtue signal than pro-trans \textit{choices}, the result here cannot rule out virtue signaling during discussions. 
However, this hypothesis seems to be ruled out by the data. The \textit{No discussion (public)} arm has no effect even when there is little plausible deniability for participants, namely, when transgender workers offer more items ($\beta$=\input{../../outputs/stats/coeff_public_dominates.tex}, $p$=\input{../../outputs/stats/p_val_public_dominates.tex}). I also cannot rule out that virtue signaling \textit{in combination} with other mechanisms contributes to the discussion's effects, e.g., if pro-trans participants are initially more vocal for other reasons and subsequently induce other participants to virtue signal.
} There were also only small or null effects on the \textit{outcome} round (\autoref{tab_mech_r1_r2}, column 2).%\footnote{\textcolor{red}{\textbf{UPDATE BASED ON NEW p-VALUES}. No significant effect is seen for \textit{non-observers} ($p\in[\input{../../outputs/stats/p_vals_public_non_observer.tex}]$), who were not told the public choices of others in their group before making their private outcome round choices. \textit{Observers}, who were told the public choices of others in their group in advance, were around \input{../../outputs/stats/effect_public_observer.tex} p.p. more likely to select transgender workers in the private outcome round ($p\in[\input{../../outputs/stats/p_vals_public_observer.tex}]$).}}
%The effect of observing others' public choices is significantly smaller than the effect of listening or taking part in a discussion. 
%Taken together, these results suggest that the discussion's effects are not driven by a pre-existing norm that leads to virtue signaling in favor of transgender people in group settings: instead, the discussion \textit{creates} a such a pro-trans norm.%\footnote{In addition, the narratives and justifications surfaced in a discussion may be inherently more persuasive than simply being told the choices of one's group members, in line with evidence that ``stories'' are more memorable than statistics \citep{graeberStoriesStatisticsMemory2022}.}
%Taken together, these results suggest that the discussion's effects cannot be explained solely by social image concerns that lead participants to discriminate less in a group setting. 
%The evidence presented above shows that a strong pro-transgender norm is likely to be a key driver of the discussion's effects, and that the communication led people to discriminate less later on. 
%
%
%
%I now examine whether that pro-transgender norm exists even in the absence of a discussion, or whether the communication during the discussion was key for generating this norm. To do this, I use the \textit{No discussion (public)} arm, which aims to isolate the effect of social image concerns on behavior in the absence of a discussion.
%
%
%
%\footnote{The experimental design may inflate the role of social image concerns in people’s decision-making due to a ``cheap act effect'' \citep{benabouElicitingMoralPreferences2020}. Participants make 10 choices, one of which is randomly selected to be implemented. This means that the utility cost of choosing a transgender worker is divided by 10, while the social image effects of doing so may not be. Given the \textit{No discussion (public)} arm has no average effect on behavior, it is unlikely that this concern can explain the large effects of the discussion.}
%
%
% In the treatment round, this arm made choices that they knew would be revealed to others, but they didn't discuss those choices. If a pro-trans norm existed in the absence of a discussion, participants would choose transgender workers more often in this public condition than participants who chose privately. 
%
%I now examine whether social image concerns could underly the effects of the discussion. If people do not want to \textit{look} discriminatory in front of others, then they may choose transgender workers more often when others can see their choices. I test this idea with the \textit{No discussion (public)} participants. In the treatment round, these participants made choices that they knew would be revealed to others, but they didn't discuss those choices.






%\footnote{This is true even though the public nature of their choices did lead participants to change their behavior. 
%
%Indeed, Appendix \autoref{tab_icc} shows that despite not shifting the mean level of discrimination, the \textit{No discussion (public)} treatment did have an effect on behavior. 
%
%Within a group, there is a greater correlation of whether each member selects a transgender in the treatment round in the \textit{public} arm as compared to the \textit{private} arm, as measured by the intra-cluster correlation coefficient ($p = $\input{../../outputs/stats/ri_p_icc_private_public.tex}).\footnote{All participants in a group are always shown the same delivery options in the \textit{treatment round}, regardless of their treatment status, making this comparison valid.}  The visibility of their choices led participants to match their behavior with their group-members.}










\subsection{Persuasion}

A third channel that could explain the effects of the discussion is that (i) people persuade each other to change their behavior with the narratives and justifications they share during the discussion, and that (ii) persuasive communication is predominantly in favor of transgender workers. 

\subsubsection{Effect of listening to discussion}

To test whether participants were persuaded by what they heard in the discussion, I examine the effects on the \textit{listener} in the \textit{2-person discussion arm}, who silently listened to two other participants taking part in a discussion. Listening led to large and significant reductions in subsequent private discrimination (\input{../../outputs/stats/effect_listener.tex} p.p., $p$$<$0.001, \autoref{tab_mech_r1_r2}). This effect is not significantly different from the effect of speaking in either the 2-person discussion ($p$=\input{../../outputs/stats/p_val_listener_speaker.tex}) or the 3-person discussion ($p$=\input{../../outputs/stats/p_val_listener_full_discussion.tex}). Since the listener was silent, they changed their private behavior solely based on being persuaded by the choices and justifications they heard from others in the discussion. The effect of the discussion therefore does not operate through self-persuasion or self-consistency channels, where active participation in the discussion is crucial for generating reductions in discrimination \citep{falkConsistencySignalSkills2017, schwardmannSelfPersuasionEvidenceField2022}. 

The implied \textit{persuasion rate} is large: \input{../../outputs/stats/persuasion_rate.tex} of listeners who would not otherwise have chosen transgender workers change their behavior. This is at the high end of the values seen in the literature on persuasion \citep{dellavignaPersuasionEmpiricalEvidence2010} \textcolor{red}{but note that it is plausible, in line with persuasion effects of in person things}. Moreover, the effects on listeners are persistent and apply when they are behaving in total privacy, providing further evidence in favor of persuasion. In the individual follow-up (when group members are very unlikely to be present) they are still \input{../../outputs/stats/coeff_listener_fu.tex} p.p. more likely to select a transgender worker  ($p$\input{../../outputs/stats/p_val_listener_fu.tex}, \autoref{tab_mechs_follow_up}). The discussion also reduces listeners' discrimination even on a short-run outcome that is completely private (i.e., unobservable by neighbors) (see Section \ref{sec_alt_mechanisms} and \autoref{tab_anon}).




\begin{table}[!htbp]
	\caption{Effect of mechanism treatments}
	\label{tab_mech_r1_r2}
	\centering
	\resizebox{0.85\textwidth}{!}{
		\input{../../outputs/tables/mechs_combine_r1_r2.tex}
	}
	\begin{tablenotes}
%		\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
%		\item Controls include items and quality stuff
%		\item 
		
		\item	\scriptsize \textit{Notes}: 
		* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses.
Randomization inference p-values are in brackets. 
Sample includes all treatment arms in phase 2 of data collection. In column (1), the \textit{Listeners} are excluded (since they only make choices after they have observed the discussion).
The specification used is otherwise the same as Tables \ref{tab_main} and \ref{tab_videos}, columns (3).
\textit{No discussion (public), pooled} includes both \textit{Observers} and \textit{Non-observers}, since both these types are treated the same until after the treatment round.
	\end{tablenotes}
\end{table}



\subsubsection{Other suggestive evidence for persuasion mechanism}



\textbf{Attitudes}. The results on measures of attitudes helps inform the nature of the persuasion that occurs in the discussion. I find that self-reported disapproval of discrimination is high, even in the control group. Participants were presented with two scenarios depicting instances of discrimination against transgender individuals and were asked whether the discriminator's actions were acceptable or wrong. The probability of saying that discrimination was wrong was already high in the control group (\input{../../outputs/stats/prop_attitude_control.tex}), and increased slightly in the discussion arm (\input{../../outputs/stats/prop_attitude_treat.tex}, $p$ of difference: \input{../../outputs/stats/attitude_p_val.tex}, effect size: \input{../../outputs/stats/attitude_effect_sd.tex} SD).\footnote{I also examine a double list experiment \citep{droitcour2004item, glynnWhatCanWe2013} that preserves anonymity of responses. It measured the proportion of people who agreed with the statement ``In general, if I see a transgender person, I walk away.'' (see Appendix \ref{sec_list_exp}). 20\% of the control group agreed with the anti-trans statement, and the discussion did not have a significant effect on the proportion of people who agreed with the sensitive anti-transgender statement in the list experiment (\autoref{tab_attitudes_beliefs_norms}, panel A, column 1).}
%Since the list experiment did not allow the enumerator or the researcher to infer \textit{which} statements the participant agrees with from the list of statements, it preserved the anonymity of their responses and so was less likely to be vulnerable to social desirability concerns than standard self-reported attitude questions. \textcolor{red}{COMMENT ON LEVELS??}.}
Given this small difference, it seems more likely that participants are predominantly being persuaded by pro-trans participants to \textit{act in accordance with their moral beliefs} by not discriminating, rather than being persuaded to change their broader attitudes towards transgender workers. For example, they might be persuaded that not selecting a transgender worker is a form of discrimination that violates their belief in equality. This mechanism is in line with evidence from psychology that suggests that persuasion is particularly effective when it increases the coherence of our beliefs (see e.g., \citealp{mercier2020not}, Ch. 4).

%I find only small or null effects of the discussion on measures of broader attitudes towards transgender people. 

%The first measure of attitudes was self-reported disapproval of discriminatory vignettes. Participants were presented with two scenarios depicting instances of discrimination against transgender individuals and were asked to evaluate whether the discriminator's actions were acceptable or wrong. 
%There was a small improvement in attitudes (\autoref{tab_attitudes_beliefs_norms}, panel A, column 2). The probability of saying that discrimination was wrong increased from an already high level in the control group (\input{../../outputs/stats/prop_attitude_control.tex}) to a slightly higher value (\input{../../outputs/stats/prop_attitude_treat.tex}, $p$ of difference: \input{../../outputs/stats/attitude_p_val.tex}, effect size: \input{../../outputs/stats/attitude_effect_sd.tex} SD). Overall, the effect of the discussion does not seem to be driven by changes in very broad-based attitudes towards transgender individuals.


%The first measure of private attitudes I examine is a double list experiment \citep{droitcour2004item, glynnWhatCanWe2013}. It measured the proportion of people who agreed with the statement ``In general, if I see a transgender person, I walk away.'' (see Appendix \ref{sec_list_exp} for more detail). Since the list experiment did not allow the enumerator or the researcher to infer \textit{which} statements the participant agrees with from the list of statements, it preserved the anonymity of their responses and so was less likely to be vulnerable to social desirability concerns than standard self-reported attitude questions. The discussion did not have a significant effect on the proportion of people who agreed with the sensitive anti-transgender statement in the list experiment (\autoref{tab_attitudes_beliefs_norms}, panel A, column 1). 







%\textcolor{red}{
%\textbf{COMMENT - suggests that people are being persuaded to change their behaviour to be in line with their beliefs (e.g., in equality), not necessarily changing attitudes, cite Hugo Mercier -- already have some baseline attitudes...(?) already somewhat favourable... asymmetric persuasion}
%}
%\textbf{Attitudes.} There were small or null effects of the discussion on measures of broader attitudes towards transgender people. 
%The first measure of private attitudes I examine is a double list experiment \citep{droitcour2004item, glynnWhatCanWe2013}. It measured the proportion of people who agreed with the statement ``In general, if I see a transgender person, I walk away.'' (see Appendix \ref{sec_list_exp} for more detail). Since the list experiment did not allow the enumerator or the researcher to infer \textit{which} statements the participant agrees with from the list of statements, it preserved the anonymity of their responses and so was less likely to be vulnerable to social desirability concerns than standard self-reported attitude questions. The discussion did not have a significant effect on the proportion of people who agreed with the sensitive anti-transgender statement in the list experiment (\autoref{tab_attitudes_beliefs_norms}, panel A, column 1). 
%
%The second measure of attitudes was self-reported disapproval of discriminatory vignettes. Participants were presented with two scenarios depicting instances of discrimination against transgender individuals and were asked to evaluate whether the discriminator's actions were acceptable or wrong. 
%There was a small improvement in attitudes (\autoref{tab_attitudes_beliefs_norms}, panel A, column 2). The probability of saying that discrimination was wrong increased from an already high level in the control group (\input{../../outputs/stats/prop_attitude_control.tex}) to a slightly higher value (\input{../../outputs/stats/prop_attitude_treat.tex}, $p$ of difference: \input{../../outputs/stats/attitude_p_val.tex}, effect size: \input{../../outputs/stats/attitude_effect_sd.tex} SD). Overall, the effect of the discussion does not seem to be driven by changes in very broad-based attitudes towards transgender individuals.


%However,  positive correlation between baseline and post-discussion attitudes is plausible as it simply requires that initially pro-trans discussants are not \textit{so} persuasive as to make other participants even more pro-trans than them.




%(Monotonicity)



%\subsection{Model}
%
%\textcolor{red}{Discuss model here}
%
%\textcolor{red}{TALK ABOUT ACTIVISTS... Discuss how model matches the fact that around 54\% of control group have at least pro-trans person, and about 50\% of discussions are pro-trans}
%
%\textcolor{red}{Discuss how it reduces to a social signalling model if alpha is small..., opens up new equilibria}
%
%\textcolor{red}{"If attitudes are too discriminatory, no one will be pro-trans; if attitudes are too pro-trans, then pro-trans people don't have an incentive to speak up"}






%Examining the effects on the \textit{outcome} round, 








\end{comment}



\begin{comment}
\section{Additional results (to comment)}


Additional results
\begin{itemize}
  \item Results by gender - \autoref{fig_gender}
  \item Results by gender of worker and by gender of participant \autoref{fig_gender_by_gender}
  \item Longer term follow up - \autoref{tab_long_term}
  \item Positive vs negative discrimination \autoref{fig_dominated_status}
  \item \autoref{fig_trans_cdf} - CDF of number of people in a group that select trans - FOSD so indicates everyone is shifted to be more pro-trans, rather than e.g., increase in polarisation
  \item \autoref{tab_het_demo} - very little heterogeneity by demographic charactersitics
  \item \autoref{tab_het_group} - closer groups persuade each other more?
  \item \autoref{fig_wtp_to_avoid} - inferring WTP to avoid transgender people
\end{itemize}

Mechanisms:
\begin{itemize}
  \item "Deliberation" - people take longer in discussions, and then also take a bit longer in outcome-round choices if they have been in a discussion (so slowing down might be part of the effect) \autoref{fig_durations}
  \begin{itemize}
  \item \autoref{fig_mem_check} - people remember discussion choices better than their own earlier choices and announcements, so there may be a role for deliberation / attention. Link to lit on narratives/stories being more memorable (Roth et al)
  \item \autoref{fig_mem_check_trans_non_trans} - not much difference between memory of trans vs non-trans though
\end{itemize}

  \item Group predictions including phase 2 \autoref{fig_group_predic_phase2}. Revealing others' choices doesn't really increase SOBs very much (in public / announce treatment condition)
  \item Within-group correlation \autoref{tab_icc} : people's choices are significantly more correlated within a group in the social image condition. Implies that social image is doing something (making groups converge) even if mean effect is quite small.
\end{itemize}


Discussion results
\begin{itemize}
\item \autoref{tab_r1_phase_2} - R1 choices in phase 2 - shows 0 social image effect in R1, and figure \autoref{fig_r1_phase2}
  \item Evidence for persuasion?
  \begin{itemize}
\item \autoref{tab_r2_heterogeneity_by_discussions} - other people's pro-trans behavior in discussion is strongly predictive of R2 choices, even conditional on own behavior, suggesting some kind of persuasion
\item \autoref{fig_het_by_positive_discussions} - asymmetric persuasion? Even with very small amount of positive discussion, there is a positive treatment effect
\item \autoref{tab_effect_of_announce} - heterogeneity - being told that others in your group chose trans is highly correlated with choosing transgender people - even if mean effect is small / 0 in announcement condition. RI p-value on the mean predicted effect of \textit{No discussion, public, observer} when evaluated at $ P(\text{selected trans in treatment round}) = 0.5 $ is p=\input{../../outputs/stats/effect_of_announce_p05}.
\item \autoref{tab_r1_r2_reasons_corr} strong correlation between reasons cited in discussion and in R2 (this could be anything though - not necessarily persuasion)
\end{itemize}
\item People discuss pairs with transgender people more (and 3-person discussions generate more discussions than 2-person discussions) \autoref{tab_amount_discussed}
\item \autoref{fig_narratives_heard} - heterogeneity with respect to which narratives were cited in discussion, shows that people explicitly talking about transgender people is correlated with choosing them; talking about other stuff is not
\item \autoref{fig_reasons_for_choices_r2} - comparing reasons for choices in R2 for control vs discussion - pro-social reasons are more likely to be cited, less likely to cite e.g., items
\end{itemize}



Robustness:
\begin{itemize}
  \item SDB robustness \autoref{tab_sdb_robustness}
  \item Logit model \autoref{tab_logit}
  \item Audio refused \autoref{tab_audio_robustness}
  \item Control video only \autoref{tab_control_vid_only}
\end{itemize}

Balance:
\begin{itemize}
  \item \autoref{tab_balance_videos} - videos
  \item \autoref{tab_balance_pooled} - 3-person discussion, phase1 and 2
  \item \autoref{tab_balance_phase2} - phase 2
\end{itemize}


Videos:
\begin{itemize}
\item \autoref{tab_videos} Overall effects of videos - both have positive effects
	\item All video / discussion arm pairs \autoref{fig_bar_vid_discussion_interact} and \autoref{tab_video_interactions}
%	\item Focused table, looking at whether  \autoref{tab_video_comp_sub}
\item \autoref{tab_law_checks} - Law video does lead to change in beliefs about the law
\item Mechanisms
\begin{itemize}
  \item Attitudes/beliefs \autoref{tab_videos_attitudes} - videos don't affect attitudes, but law video affects beliefs about whether they will do the delivery
  \item Norms \autoref{tab_videos_norms} - law has small effect on norms, messaging video doesn't
\end{itemize}
\item Confounders \autoref{tab_video_confounders}, \autoref{tab_video_confounder_interact} - video has effect on perceived purpose. Not powered to detect interaction effects.. Effects are killed when interacting with SDB, so not clear
\item Discussion narratives - \autoref{fig_reasons_r1_by_video} - law video increases pro-social discussion more
\end{itemize}



Arguments against confounders

\begin{itemize}
  \item No order effects - so probably not due to making it more interesting, less boredom, or the gap between treatment/outcome round (\autoref{fig_order_effects})
  \item Perceived purpose of the experiment - looking at all answers, few people guess correctly, lots of people say market research / measuring preferences for deliveries, etc. \autoref{fig_perceived_purpose}
  \item \autoref{tab_item_sensitivity} - item sensitivity does not change in discussion vs no-discussion. Implies that there's not something weird going on where when making group choices they are more likely to prefer the bundle of items (Liam's concern)
\end{itemize}


\end{comment}






%	\begin{table}[htbp]
%	\centering
%	\caption{Mechanism outcomes: attitudes, beliefs, and beliefs about others}
%	\flushleft
%\footnotesize \textbf{Panel A:} Attitudes and beliefs	
%%\vspace{0.25em}
%	\normalsize
%		\begin{subtable}[t]{1\textwidth}
%\label{tab_attitudes}
%\resizebox{\textwidth}{!}{
%\input{../../outputs/tables/attitudes.tex}
%}
%		\end{subtable}
%	\vspace{1em}
%		\flushleft
%\footnotesize \textbf{Panel B:} Beliefs about others
%	\vspace{0.25em}
%	\normalsize
%		\begin{subtable}[t]{1\textwidth}
%\resizebox{\textwidth}{!}{
%\input{../../outputs/tables/norms.tex}
%}
%		\end{subtable}
%	\footnotesize
%%\flushright \textit{Continued on next page...}
%\end{table}
	
	


	
	
	
		
	
	
	



	
	
	
%		\begin{figure}[!htbp]
%		
%		\centering
%		\caption{Probability of selecting the alternative worker in the treatment round}
%			\includegraphics[width=0.7\linewidth]{../../outputs/figs/r1_bar.pdf}
%		\label{fig_r1}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: The outcome is whether the participants selected the alternative worker in the treatment round of their hiring choices.
%		\end{tablenotes}
%	\end{figure}
%	
	
	
	
%\section{}










	
\section{Discussion}

%\textcolor{red}{ADD DISCUSSION OF WHEN HORIZONTAL COMMUNICATION WILL HELP AND WHEN NOT -- give me ideas}

%\textcolor{red}{Objective: ``come away with a clear understanding of the types of settings where horizontal communication is likely to reduce discrimination, and those where it might exacerbate it''}

%Involving majority-group members in a group discussion and hiring decision sharply reduced discrimination against transgender people in a real-stakes hiring decision. Even though the discussion I evaluate lasts only 10 minutes, it also has impacts on medium-run choices. The results appear to be driven by pro-trans participants' disproportionate willingness to speak up, which leads to a large reduction in participants' perception of how discriminatory the norm is towards transgender workers, and thus reduces how much they discriminate in private after the discussion.

A group discussion about hiring transgender workers sharply reduced discrimination in subsequent real-stakes hiring choices. Despite lasting only 10 minutes, the discussion also decreased discriminatory choices several weeks later. I show experimental evidence that the impact is driven by inter-personal persuasion, and suggestive evidence based on correlations that it stems from pro-transgender participants speaking up more frequently during discussions, leading others to perceive social norms as less discriminatory and consequently reduce their own private discrimination after the discussion ended. These results are a proof-of-concept that the horizontal communication can lead to large reductions in discrimination. Progressive social change can thus occur not only by generating social contact between in-groups and out-groups, but also by encouraging communication among the in-group in the right circumstances.


%These insights should be added to our understanding of how social change can occur, going beyond the current focus on social contact between in-groups and out-groups.


%I also show that top-down communication about the legal rights of a minority can significantly reduce discrimination in the short-run, although the effects are substantially smaller.

Why did horizontal communication sharply reduce discrimination in this setting? It can also amplify prejudice, as in 1930s Germany when anti-Semitic sentiment spread through social networks \citep{satyanathBowlingFascismSocial2017}, or in other psychological studies examining group dynamics \citep{myersDiscussionEffectsRacial1970}. The norm-based persuasion mechanism I show evidence for is directionally neutral -- it could either reduce or increase discrimination depending on which voices dominate the discussions. The central puzzle is thus explaining why pro-trans participants were so vocal in this context.

The contextual factor that appears to be key is a striking disconnect: while discrimination is strong and common in the absence of communication, there is nevertheless widespread agreement that discrimination is wrong (e.g., 93\% of control participants say it is wrong). 
This contrasts with many settings where discrimination is not only common but also widely accepted or even morally encouraged. This gap between behavior (descriptive norms) and moral beliefs (prescriptive norms) could explain four key patterns that could drive the positive effects: (i) the substantial minority of pro-trans participants; (ii) the minimal social costs for advocating against discrimination, unlike in contexts where discrimination is both practiced and accepted; (iii) pro-trans participants' greater propensity to try to influence others, consistent with them viewing discrimination as a moral wrong rather than a personal choice, and thus caring more about others' actions; and (iv) the particular prevalence and influence of pro-trans moral arguments about equal rights and fairness that align with participants' stated values (unlike in Nazi Germany, where moral narratives focused on impurity were used to \textit{encourage} discrimination).  All of these factors would, in turn, contribute to pro-trans participants' disproportionate willingness to speak up, and thus plausibly explain the reductions in post-discussion discrimination. They could also explain why the descriptive norm seems to be so responsive to the discussion. Future research could valuably explore the boundary conditions of when horizontal communication is helpful in other contexts. Looking for other contexts with a wedge between the level of discrimination (what people do) and the acceptance of discrimination (what people say is right), for example, using attitude surveys, would be a promising avenue for finding such contexts.


%\textcolor{red}{ADD SOMETHING ON HAVING LOTS OF VARIATION WITHIN A SOCIAL NETWORK AS WELL AS ANOTHER CONTEXTUAL FACTOR}

%\textcolor{red}{INCLUDE A LITTLE THING ON LIMITAITONS HERE??? a specific paragraph}


The study has several important limitations. First, the main effects are only measured shortly after the discussions, which may explain the large coefficients, and the medium-run effects are only based on hypothetical choices. The post-discussion decisions are also of the same type as those made during the discussions. This raises the concern that some of the effects may be driven by participants' drive to maintain consistency (although the large effects on silent listeners alleviate this concern, since they did not make choices in the discussion). Second, I do not elicit baseline measures of discrimination. While this was done deliberately to minimize the risk that participants guessed the purpose of the experiment and reduce experimenter demand effects, it limits the ability to conclusively understand which types of participants speak up and how group composition relates to persuasion. Third, the communication involved in the discussion is not entirely natural: the presence of an enumerator facilitating the discussion may influence participants' behavior, and participants make incentivized choices at the same time as communicating. Although facilitator influence may also exist for the control group, and the intervention resembles a policy-style intervention, the results may not as clearly map onto the effects of organic horizontal communication in society absent an intervention. Fourth, while the evidence in favor of inter-personal persuasion is based on an experimental variation, the evidence on other mechanisms related to norm updating and asymmetries is only suggestive and primarily based on consistent patterns of correlations. Future research could explore participants' decisions to speak up in more organic scenarios (e.g., with no facilitator), and use exogenous variation in group composition and baseline attitude measures to better understand mechanisms.

Future research could also explore how to design policies based on the insight that in-group communication can reduce discrimination under the right circumstances. Policies that create discussions at scale to change attitudes are particularly promising (e.g., see \citealp{broockmanDurablyReducingTransphobia2016, kallaReducingExclusionaryAttitudes2020, dharReshapingAdolescentsGender2022a}). The role of vocal pro-trans participants also suggests it may be possible to amplify the voice of pro-social actors within a network by identifying them and training them to influence others' behavior. This tactic underlies the tactic followed by activist NGOs, and has also shown promise when trying to reduce the stigma surrounding menstruation \citep{macours2024menstrual}. Finally, we could use groups rather than individuals to make high-stakes decisions in order to reduce discrimination in cases where a pro-minority prescriptive norm is in place.



%It will also be important to investigate what the most promising avenues for building policies based on the insight that group communication can reduce discrimination. 
%First, future research could design and evaluate policies that create discussions at scale to change attitudes towards minorities. This builds on previous work showing that one can change discriminatory attitudes by running discussion-based interventions in schools \citep{dharReshapingAdolescentsGender2022a}, or by door-to-door canvassing \citep{kallaReducingExclusionaryAttitudes2020, broockmanDurablyReducingTransphobia2016}. An important caveat to my results is that the short 10-minute discussion in my study generates relatively small medium-run impacts on discrimination. Policies likely require more intensive and repeated interventions to have larger and longer-run effects.


%Second, the importance of persuasive ``activists'' or ``positive deviants'' suggests a more directed policy approach. Instead of letting communication occur organically (which risks generating a negative effect if anti-minority participants happen to be more vocal), it may be possible to amplify the voice of pro-social actors within a network by identifying them and training them to change others' behavior. This approach has shown promise in addressing a different harmful norm---the taboo surrounding menstruation \citep{macours2024menstrual}.






%My results raise the possibility of reducing discrimination without even having to \textit{lead} a discussion; instead, just creating a scenario where minorities are naturally discussed at all may be sufficient in some contexts. One important caveat is that the short 10-minute discussion in my study only generates small medium-term impacts on discrimination. Policies likely require more intensive and repeated interventions to have larger and longer-run effects.
%
%Finally, my results suggest that under the right conditions, groups discriminate much less than individuals. This implies that in high-stakes decisions where discrimination might take place (in hiring, housing, college admissions, etc.), it may be especially important to design a decision environment that promotes fair decision-making. Future work could investigate how encouraging \textit{collective} hiring choices can reduce discrimination, and further explore the role of vocal advocates for equality, building on existing work that has examined the effect of different compositions of selection committees (e.g., \citealp{baguesCanGenderParity2010, baguesDoesGenderComposition2017}).




%First, advocating against discrimination carries minimal social costs (unlike defending Jewish people in Nazi Germany). Second, pro-trans participants have a greater desire to influence others, \textcolor{red}{likely} because they view discrimination as morally wrong rather than simply a personal preference \textcolor{red}{(A BIT CRYPTIC)}. Finally, moral arguments about fairness, equal rights, and giving opportunities because they 





%I show that horizontal communication leads to large reductions in discrimination in this setting. But several  historical episodes—such as the spread of anti-Semitic sentiment in 1930s Germany—demonstrate that horizontal discussions can also amplify rather than mitigate prejudice (Voth et al. 2022). Why, then, is horizontal communication so beneficial in this setting? It is important to note that norm-updating and the spiral of silence dynamic could either reduce or increase discrimination, depending on who is more vocal and influential in the discussions. The key question then becomes: why were pro-trans participants so vocal in my discussions?

%I suggest that the key contextual factor that ties together the results on the mechanisms is that while discrimination is on average strong and common in the absence of communication, there is nevertheless widespread agreement that discrimination is wrong (e.g., \textcolor{red}{93\%} of control participants say discrimination is wrong). In other words, there is a wedge between the descriptive norm of behavior towards transgender people (people \textit{do} discriminate) and the prescriptive norm (people believe that it is \textit{wrong} to discriminate). This of course contrasts with other contexts in which discrimination is simultaneously widely practiced and accepted. This factor can explain a number of patterns in the data: it explains why (i) there do not appear to be major social image costs to advocating against discrimination (in contrast, presumably, to advocating to defend Jewish people in Nazi society); (ii) why pro-trans people have a greater desire to influence others (since \textit{discriminating} is seen as an immoral action, whereas \textit{not discriminating} is simply a personal choice (\textcolor{red}{explain this more}); (iii) and why the moral frames of fair treatment, equal rights, and giving opportunities are so prevalent in the discussions and appear to be especially pervasive and influential (compared to societies where moral frames would actually play against minorities, e.g., using moral frames of impurity, that they are ruining the fabric of civilization, etc.). All of these factors potentially contribute to pro-trans participants' disproportionate vocality.











%-------

%\textcolor{red}{OLD VERSION}



%I find that discriminators persuade each other to discriminate \textit{less}, which contrasts to previous literature  documenting that discussions tend to reinforce preexisting prejudice (e.g., \citealp{myersDiscussionEffectsRacial1970}). Understanding \textit{why} there is such a large pro-minority shift in this context is therefore crucial. I document the important role of persuasion, driven in particular by pro-trans ``activists'' who speak up against discrimination using moral arguments. But given the specificity of my study on the transgender community in India, evaluating whether such mechanisms apply in other contexts would be valuable. For example, research on some other stigmatized groups --- such as those experiencing homelessness, poverty, or disability --- has shown that sympathy, pity, and guilt can motivate supportive actions, even while coexisting with stigmatizing attitudes (e.g., \citealp{odriscollPositivePrejudiceEthnic1985,iyerWhiteGuiltRacial2003, mallettSeeingTheirEyes2008, harthAdvantagedGroupEmotional2008, thomasTransformingApathyMovement2009,tsaiChangesPublicAttitudes2017, lantosPityEconomicallyDisadvantaged2020,  dullCanWhiteGuilt2021}). Horizontal communication could be particularly effective in these areas. Conversely, horizontal communication is likely to be less effective when anti-minority attitudes are very deep-set, or when people are being asked to engage in pro-minority actions that are significantly more costly. Further research could also examine the effects of horizontal communication outside the realm of discrimination, for example on political views or attitudes towards climate change. %In my context, while participants changed their willingness to interact with a transgender worker for 15 minutes, more intensive interventions would likely be needed to change participants' willingness to have a transgender neighbor, work with a transgender colleague for a year, become friends with a transgender person.





%The results raise a number of questions that are important avenues for future research.

%A key remaining uncertainty is the extent to which these results generalize to other contexts and other minorities. An important limitation of the study is the focus on the transgender community in India, and the concern that the specific social dynamics driving behavior towards that community do not generalize to other minorities. There are therefore several important research avenues that are important for understanding whether the mechanisms I examine are present in other contexts.

%First, when does pro-minority horizontal communication arise endogenously in other group settings? 
%Other research suggests that virtue signaling motives can discourage people from expressing anti-minority views \citep{bursztynJustifyingDissent2023, bursztynExtremeMainstreamErosion2020, braghieriPoliticalCorrectnessSocial2021}, providing one mechanism that may be common to other contexts. 
%I show evidence that horizontal communication can also be beneficial when pro-minority individuals speak up and persuade others to discriminate less, which can occur when attitudes are in a sweet spot -- discriminatory, but not too discriminatory. Despite economically important anti-transgender discrimination in the control group, this discrimination may be lightly held, and borne of unfamiliarity rather than deep animosity. This suggests that discussions will be most effective when there are many people on the margin of not discriminating, and when there are some people willing to advocate for a discriminated group. 





%Lightly held prejudice, born of unfamiliarity, difference, etc. - might be quite malleable
%Lots of discrimination, even based on lightly held attitudes.



%The null results on broader attitudes suggest that more intensive interventions would be needed to change behavior on e.g., housing, year-long interaction, 
%
%
%- maybe won't work when attitudes are very negative
%
%- maybe won't get people to do actions that are more costly... e.g., upsetting the status quo // 
%
%{\color{red}AMPLIFY THE VOCAL MINORITY}


%Research on other stigmatized groups, such as those experiencing homelessness, poverty, or disability, has documented cases where many people appear to be marginal. For example, 

%has investigated indicative cases around such a margin. For example, psychologists have shown that sympathy, pity, and guilt can coexist with stigmatizing attitudes, and motivate action in favor of them, even if they are condescending, as long as this action doesn't upset the status quo. 




%Second, when are endorsements of minorities more persuasive than rejections of them? I show evidence that people often justify their choices of transgender people with pro-social narratives, which may be more persuasive than the practical justifications people cite for not choosing them. This aligns with lab evidence \citep{balafoutasMoralSuasionCharitable2022, hillenbrandAsymmetricEffectNarratives2022} suggesting that pro-social moral suasion may be more change people's behavior. So far, however, we have little understanding of \textit{why} this might be the case, and therefore in which settings we can expect this asymmetry. 

%Second, my results raise the question of why the horizontal communication that reduces discrimination has not already occurred in equilibrium. In the follow-up survey, only \input{../../outputs/stats/prop_conv_trans.tex} of the control group had talked about transgender people in the time since the main survey (even after having gone through the survey itself, which clearly involved transgender people). One possibility is that people are exploiting ``moral wiggle room''.\footnote{See, e.g., \citet{danaExploitingMoralWiggle2007, lazearSortingExperimentsApplication2012, hammanSelfInterestDelegationAdditional2010, danaWhatYouDon2006, andreoniAvoidingAskField2017} for examples of this moral wiggle room effect.} They avoid talking about transgender people in order to avoid having to act pro-socially towards them; they would prefer to act selfishly towards them without making it explicit that they are discriminating. Alternatively, discrimination could come hand-in-hand with a lack of social contact between transgender and non-transgender people, meaning that transgenderism is rarely raised as a topic. Either of these reasons could explain why creating a situation in which transgender workers are explicitly discussed can have large effects on discrimination.


%especially when there are likely to be .
%
%
%In particular, we should investigate further how group dynamics and collective hiring choices may affect discrimination,  %This requires going beyond the conventional economic perspective, which views discrimination primarily through the lens of individual decision-making.












%{\color{red}I focus on two potential such mechanisms: social image concerns that lead people to act in a pro-minority way during a discussion, and the persuasive power of the endorsements and narratives about minorities that can change people's private attitudes.}

%Do people 



%Are the pro-social narratives that surface in the discussion specific to transgender people, or would people also use similar narratives when discussing discrimination towards caste, ethnic, and religious minorities? 


%has seen relatively little investigation


%At the same time, I show suggestive evidence of a prescriptive norm that encourages people to not want to appear to discriminate. 

%First, what are the most promising avenues for scaling up this insight into a policy intervention? 

	
	
	
	
	
			
%		\citep{hillHowQuicklyWe2013}


		




\clearpage


%	\includegraphics{../survey_materials/whatsapp}
	
\renewcommand{\bibfont}{\footnotesize}
	
\begingroup
\footnotesize  % or \footnotesize for even smaller text
\setlength{\bibsep}{0pt plus 0.5ex} % Reduces space between entries
	\bibliography{zotero_library.bib}
\endgroup
	


%\includegraphics{../../../../../Mac (2)/Documents/WhatsApp Image 2021-12-25 at 14.03.42 (3)}
	
	\normalsize
	
	\clearpage
	
	\appendix
	 \setstretch{1.15}
	
%	\linespread{1.2}
%\setstretch{1.2}	
 	
	
\makeatletter
\renewcommand{\thetable}{\Alph{section}\arabic{table}}
\renewcommand\thefigure{\Alph{section}\arabic{figure}}   
\let\c@table\c@figure
\let\ftype@table\ftype@figure % for (2)
\makeatother 
	
	
\clearpage
	\section{Additional tables and figures}
	\setcounter{table}{0}
\setcounter{figure}{0}

\begin{figure}[htbp]
		\centering
		\caption{Experimental design (detailed)}
		\includegraphics[width=\linewidth]{../../outputs/figs/diagram_design_v2.pdf}
		
		\label{fig_main_design_detail}
		
	\end{figure}


\begin{figure}[htbp]
		
		\centering
		\caption{Survey locations}
			\includegraphics[width=\linewidth]{survey_locations.pdf}
		\label{fig_survey_locations}
		\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: This shows the location of each survey. Red dots denote surveys from phase 1. Blue dots denote surveys from phase 2. %Hollow bars represent the likelihood of a participants predicting that their neighbor selects a transgender person for a delivery. Filled bars represent the actual probability of choosing a transgender person in the outcome round, restricting to only the choice pairs for which a prediction was made. Only choice-pairs that \textit{do not} include a transgender photo are included.
		\end{tablenotes}
	\end{figure}
	
	
\begin{table}[!htbp]
\caption{Transgender photo recognition confusion matrix}
\label{tab_trans_recog_matrix}
\centering
%\resizebox{\textwidth}{!}{
\input{../../outputs/tables/trans_recog_matrix.tex}
%}
\begin{tablenotes}
\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
From supplementary data collection that took place in August-September 2022 (N=114). Each participant was shown 14 worker photos. 11 of these were male or female, and 3 were transgender. The participant was asked to select all the photos that were transgender. Transgender photos were recognized as being transgender \input{../../outputs/stats/prop_trans_recog.tex} of the time (332/342), and non-transgender photos were falsely identified as transgender photos only \input{../../outputs/stats/prop_non_trans_false_pos.tex} of the time (15/1254).
\end{tablenotes}
\end{table}





\begin{table}[!h]
\caption{Balance for 3-person discussion (Phases 1 + 2)}
\label{tab_balance_pooled}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/balance_pooled.tex}
}
\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: Columns 1 and 2 show the means of the covariates for the \textit{No discussion (private)} arm and \textit{3-person discussion} arm, including participants from phases 1 and 2. Column 3 shows the $p$-value of a test of the equality of columns 1 and 2. * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. The base of the table displays the test statistic and p-value for an F-test for the equality of all covariates across the treatment arms. 
%			\item Only choice-pairs from the outcome round that include a transgender photo are included. The dependent variable in all columns is whether the transgender person was selected. All SDB scores are based on the \citet{crowne1960marlowe} index. In column (1), the SDB score is corrected for acquiescence bias. In column (2), I run a factor analysis on the SDB score, and use the resulting index. In column (3), I use an index calculated using inverse-covariance weights, as seen in  \citet{andersonMultipleInferenceGender2008}. More detail on the construction of the SDB scores is found in Appendix \ref{sec_data_sdb}.
		\end{tablenotes}
\end{table}


\begin{table}[!h]
\caption{Balance for transgender rights videos}
\label{tab_balance_videos}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/balance_videos.tex}
}
\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Columns 1-3 show the means of the covariates for each of the rights videos arms. Columns 4-5 show the $p$-value of a test of the equality of columns 1-3. * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. The base of the table displays the test statistic and p-value for an F-test for the equality of all covariates across the treatment arms.
%			\item Only choice-pairs from the outcome round that include a transgender photo are included. The dependent variable in all columns is whether the transgender person was selected. All SDB scores are based on the \citet{crowne1960marlowe} index. In column (1), the SDB score is corrected for acquiescence bias. In column (2), I run a factor analysis on the SDB score, and use the resulting index. In column (3), I use an index calculated using inverse-covariance weights, as seen in  \citet{andersonMultipleInferenceGender2008}. More detail on the construction of the SDB scores is found in Appendix \ref{sec_data_sdb}.
		\end{tablenotes}
\end{table}

\begin{landscape}
\begin{table}[!htbp]
\caption{Balance for phase 2 discussion arm treatments}
\label{tab_balance_phase2}
\resizebox{1.55\textwidth}{!}{
\input{../../outputs/tables/balance_phase_2.tex}
}
\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: Columns 1-4 show the means of the covariates for all discussion-treatment arms in Phase 2. Columns 5-7 show the $p$-value of a test of the equality of columns 1-4. * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. The base of the table displays the test statistic and p-value for an F-test for the equality of all covariates across the treatment arms. 
%			\item Only choice-pairs from the outcome round that include a transgender photo are included. The dependent variable in all columns is whether the transgender person was selected. All SDB scores are based on the \citet{crowne1960marlowe} index. In column (1), the SDB score is corrected for acquiescence bias. In column (2), I run a factor analysis on the SDB score, and use the resulting index. In column (3), I use an index calculated using inverse-covariance weights, as seen in  \citet{andersonMultipleInferenceGender2008}. More detail on the construction of the SDB scores is found in Appendix \ref{sec_data_sdb}.
		\end{tablenotes}
\end{table}
\end{landscape}



\clearpage





\begin{table}[!htbp]
\caption{Discussion effects are robust to restricting to control video only}
\label{tab_control_vid_only}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/control_vid_only.tex}
}
\begin{tablenotes}
\item \footnotesize	\textit{Notes}: Sample includes \textit{only} participants who saw the control video, and excludes participants who saw the rights messaging or legal rights videos. * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Randomization inference p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample includes the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phase 1 and 2. Column (3) only includes choices that involved a transgender worker. 
In columns (1) and (2), the outcome is whether the \textit{alternative worker} (rather than the male \textit{benchmark worker}) was chosen in the private choices in the \textit{outcome round}. In column (3), it is whether the transgender worker was selected. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female. The specification used is seen in equation \ref{eqn_main_spec}.  Controls include stratum fixed effects; whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; whether the alternative worker was shown on the right; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). In column (2), controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown. Columns (2) and (3) also include controls for the relative \# items offered by the alternative worker, the relative reliability score of the worker, and a dummy for whether the reliability score was shown.
%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
\end{tablenotes}
\end{table}



\begin{table}[htbp!]
\centering
\caption{Medium-run effects of videos on hypothetical hiring choices (2-9 weeks)}
\label{tab_follow_up_videos}
%\small \textbf{Panel B:} Effect of transgender rights videos (all participants)
\centering
\vspace{-1em}
\normalsize
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/follow_up_video.tex}
}

\footnotesize
\begin{tablenotes} \scriptsize
\item \textit{Notes:} * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Randomization inference p-values are in brackets. Sample includes all participants. Controls include dummies for the discussion-arm treatments, as well as the other controls specified in Table \ref{tab_videos}. In the follow-up survey, workers in a pair always had the same reliability score and offered the same number of items. Specification is otherwise the same as Table \ref{tab_videos}.
\end{tablenotes}
\end{table}




%Sample includes all participants in both phases, in all discussion-arm treatments. Column (3) only includes choices that involved a transgender worker. 
%In columns (1) and (2), the outcome is whether the \textit{alternative worker} (rather than the male \textit{benchmark worker}) in the private choices in the \textit{outcome round}. In column (3), it is whether the transgender worker was selected. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female. 
%The mean of the dependent variable when the worker is trans and in the \textit{No discussion (private)} arm indicates that the transgender worker was selected (rather than the male benchmark worker) \input{../../outputs/stats/p_choose_trans_control.tex} of the time. The mean when the worker is male or female in the \textit{No discussion (private)} arm is above 50\% because participants on average prefer female alternative workers to the male benchmark workers.
%The specification used is seen in equation \ref{eqn_main_spec}.   In column (2), controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown. Columns (2) and (3) also include controls for the relative \# items offered by the alternative worker, the relative reliability score of the worker, and a dummy for whether the reliability score was shown. randomization inference \textit{p}-values at the base of the table test for differences between treatment effects across treatment arms, i.e., for differences in the interacted terms in columns (1) and (2), and differences in the uninteracted terms in column (3).
%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
%\end{tablenotes}
%\end{table}

%
%\begin{figure}[!htbp]
%	
%	\centering
%	\caption{Interactions between rights videos and discussions}
%	\includegraphics[width=\linewidth]{../../outputs/figs/all_6_groups.pdf}
%	\label{fig_bar_vid_discussion_interact}
%	\begin{tablenotes}
%		\footnotesize
%		\item  \textit{Notes}: 95\% confidence intervals are based on standard errors clustered at the group-of-3 level. Unit of observation is the participant $\times$ choice level. Outcome is whether a participant chose the transgender worker in the private outcome round (restricting analysis to only choices with transgender workers). Left panel includes only participants in the \textit{No discussion (private)} and \textit{3-person discussion} arms, in both phases 1 and 2. Right panel in includes all discussion-arm treatments in phase 2 of data collection.
%		\item 
%		%			\item \textit{Panel A}: x-axis is the probability that a given person in the group was marked as speaking positively about transgender workers for a given choice that included a transgender worker.
%		%			Difference between No discussion (private) and 3-person discussion with no positive discussion at all is \input{../../outputs/stats/diff_control_anti_discuss} %%(p=\input{../../outputs/stats/diff_control_anti_discuss_p}).
%		%\item \textit{Panel B}: 
%		%The outcome is whether the participants selected the comparator in the treatment round of their hiring choices.
%	\end{tablenotes}
%\end{figure}





	\begin{figure}[!htbp]
		\centering
		\caption{Inferring WTP to avoid transgender workers}
			\includegraphics[width=0.7\linewidth]{../../outputs/figs/wtp_by_item_diff.pdf}
		\label{fig_wtp_to_avoid}
		\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: Points represent the probability of choosing the alternative worker at the given difference in value of items in Rs. Solid lines represent a linear fit. I take the reduction in probability that an option is chosen when a worker is transgender in each treatment group, and divide it by the gradient of selecting an option with respect to item value.
Gradient with respect to item value (pooled across all treatment groups and alternative worker types) is \input{../../outputs/stats/item_value_gradient_raw.tex}, implying that increasing the value of the items offered by an option $A$ by 100 Rs. (relative to the other option $B$ in the pair) increases the probability of a participant selecting $A$ by \input{../../outputs/stats/item_value_gradient_pp.tex} p.p.
 The mean reduction in the probability of choosing the alternative worker when they are trans is \input{../../outputs/stats/baseline_discrim_raw.tex} in the control group, and \input{../../outputs/stats/treatment_discrim_raw.tex} in the discussion group. This corresponds to a willingness to pay to not choose transgender workers of \input{../../outputs/stats/baseline_discrim_raw.tex} / \input{../../outputs/stats/item_value_gradient_raw.tex} = \input{../../outputs/stats/wtp_control.tex} Rs. in the control group that reduces to \input{../../outputs/stats/treatment_discrim_raw.tex} / \input{../../outputs/stats/item_value_gradient_raw.tex} = \input{../../outputs/stats/wtp_treat.tex} Rs. in the discussion group.
		 		\end{tablenotes}
	\end{figure}








	
	
	
	
%	\begin{figure}[!htbp]
%		
%		\centering
%		\caption{Probability of selecting the alternative worker for each gender separately, by participant gender}
%			\includegraphics[width=\linewidth]{../../outputs/figs/r2_by_participant_gender.pdf}
%		\label{fig_gender_by_gender}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: Top two panels show results from \textit{female participants}. Bottom two panels show results from \textit{male participants}. The unit of observation is participant $\times$ choice. The sample includes participants in the \textit{No discussion (private)} and \textit{3-person discussion} round. Only choices from the private outcome round are included. The outcome is whether the participant selected the \textit{alternative worker}, who could be male, female, or transgender, instead of the male benchmark worker. Each participant saw two choices where the alternative worker was female, two choices where the alternative worker was male, and two choices where the alternative worker was transgender. 
%			%The outcome is whether the participants selected the comparator in the treatment round of their hiring choices.
%		\end{tablenotes}
%	\end{figure}



\begin{table}[!htbp]
\caption{Dominated and dominating choices: negative discrimination decreases and positive discrimination increases}
\label{tab_dominates}
\centering
\resizebox{0.9\textwidth}{!}{
\input{../../outputs/tables/dominates_table_split_sample.tex}
}
\begin{tablenotes}
			\scriptsize
			\item  \textit{Notes}: %Includes all participants from both phase 1 and 2, apart from listeners. \textit{Discussion (pooled)} = participants in \textit{3-person discussion} arm or speakers in the \textit{2-person discussion} arm. \textit{No discussion (pooled)} = participants in \textit{No discussion (public)} or \textit{No discussion (private)} arm. \input{../../outputs/stats/se_note.tex}
			Unit of observation is participant $\times$ choice. Outcome is whether the participant selects the alternative worker instead of the male benchmark worker. An option (P) (weakly) dominates an option (Q) if it is strictly better on at least one characteristic, and is not worse on any characteristic. More specifically, P weakly dominates Q when (i) P either offers more items than  Q, or P has a higher reliability score than Q (if it is shown), or both; \textit{and} (ii) Q does not offer more items than P, \textit{and} (iii) Q does not have a higher reliability score than P (if it is shown). \textit{Trans dominates} is when the transgender worker weakly dominates the other option. \textit{Trans is dominated} is when the transgender worker is weakly dominated by the other option. \textit{Neither dominates} is when neither the transgender worker nor the other option dominates.
			 Controls include stratum fixed effects; dummies for the rights videos;  whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; phase fixed effects; whether the alternative worker was shown on the right; and the controls selected by double LASSO (see Section \ref{sec_lasso}).%			\item Only choice-pairs from the outcome round that include a transgender photo are included. The dependent variable in all columns is whether the transgender person was selected. All SDB scores are based on the \citet{crowne1960marlowe} index. In column (1), the SDB score is corrected for acquiescence bias. In column (2), I run a factor analysis on the SDB score, and use the resulting index. In column (3), I use an index calculated using inverse-covariance weights, as seen in  \citet{andersonMultipleInferenceGender2008}. More detail on the construction of the SDB scores is found in Appendix \ref{sec_data_sdb}.
		\end{tablenotes}
\end{table}



	\begin{figure}[!htb]
		
		\centering
		\caption{Predictions about group members' private choices depends on positivity of discussion}
			\includegraphics[width=\linewidth]{../../outputs/figs/het_by_posneg_discussion_predictions.pdf}
		\label{het_by_posneg_discussion_predictions}
		\begin{tablenotes}
			\item \vspace{-1.5em} \textit{Notes}: \footnotesize \textit{Notes}: \textit{P(Other group members spoke trans)} (x-axis) is the proportion of (discussion choice $\times$ participants) that spoke positively about a transgender option. There were two other group members, each with two opportunities to speak positively about a transgender worker. \textit{P(Predicted group would choose trans)} (y-axis) is the predicted probability that a fellow group member would choose the transgender worker in the private outcome round. Plotted line of best fit and 95\% confidence intervals are based on raw data with no controls. Left panel uses 3-person discussion participants in phases 1 and 2; right panel uses listeners in phase 2. Slope and p-values come from models with LASSO controls and all other controls in \autoref{tab_main}. $\beta$=0.36 implies that each additional positive discussion from a group member corresponds to being $36/4=9$ p.p. more likely to select a transgender worker in the outcome round. \textit{$H_0 ($intercept=control$)$} tests whether a discussion participant for whom \textit{P(Other group members spoke trans) = 0} exhibits different post-discussion discrimination from the control group.	%\footnotesize \textit{Notes}: Coefficient in black is using model with LASSO, all other controls in Table 1; Plotted slope is regression with no controls, standard errors clustered at group-of-3 level. \textcolor{red}{What is P(Other group members spoke pro-trans)}. There are two other group members, each with two opportunities to speak positively about a transgender worker, so $\beta$=0.28 implies that each additional positive discussion corresponds to being $28/4=$\input{../../outputs/stats/het_by_posneg_discussion_full_coeff.tex} p.p. more likely to select a transgender worker in the outcome round.
			%Sample includes all participants in the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phases.  Unit of observation is participant $\times$ prediction. Only choices that include a transgender photo are included. Hollow bars represent the probability that a participant predicts that their group-member selects a transgender delivery worker. The prediction was incentivized. Each participant made 2 predictions (one involving a transgender worker) for each of their 2 group members. The two predictions involving a transgender worker are included for analysis. Filled bars represent the actual probability that participants select a transgender worker in the outcome round (restricting to only choices for which another group member made a prediction).
		\end{tablenotes}
	\end{figure}
	
	
	

\begin{figure}[!htbp]
		
		\centering
		\caption{Probability of selecting the alternative worker for each gender separately}
			\includegraphics[width=0.8\linewidth]{../../outputs/figs/r2_summ_mf.pdf}
		\label{fig_gender}
		\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: The unit of observation is participant $\times$ choice. The sample includes participants in the \textit{No discussion (private)} and \textit{3-person discussion} round. Only choices from the private outcome round are included. The outcome is whether the participant selected the \textit{alternative worker}, who could be male, female, or transgender, instead of the male benchmark worker. Each participant saw two choices where the alternative worker was female, two choices where the alternative worker was male, and two choices where the alternative worker was transgender.%The outcome is whether the participants selected the comparator in the treatment round of their hiring choices.
		\end{tablenotes}
	\end{figure}
	
%	\includegraphics{../../treat_by_photo_clean}
	








\begin{table}[!htbp]
\caption{Sentence-level transcript analysis using k-means clustering}
\label{transcript_cluster_quotes}
\centering
\resizebox{1\textwidth}{!}{
\input{../../outputs/tables/transcript_cluster_quotes.tex}
}
\begin{tablenotes}
\item \footnotesize \textit{Notes}: All sentences in the transcripts are clustered into 20 semantically similar clusters using OpenAI's embeddings and k-means clustering (Appendix \ref{sec_appendix_transcript_sentence}). Each row represents a cluster. 3 representative quotes are chosen that have the greatest cosine similarity to the cluster mean. \textit{\% sentences in the discussion} denotes the proportion of sentences that belong to that cluster, for discussions without a transgender option (column 3), and for discussions with a transgender option (column 4). Column 5 is the coefficient  of a regression of the group-level proportion of utterances in that cluster for transgender conversations, with the outcome as \textit{P(chose trans in outcome round)}. For this regression the proportion of utterances is standardized so it can be interpreted in standard deviations (e.g., if a group is 1 SD more likely to utter statements in cluster 1, this is associated with a 6.8 p.p. increase in the probability of selecting transgender workers). $q$-values are the \citet{andersonMultipleInferenceGender2008} multiple-hypothesis-adjusted p-values. Rows are ordered by ascending $q$-values. Correlation with non-trans quotes are all non-significant (all $q$-values=1). 
%\item 
%
%\textcolor{red}{This is the proportion of sentences in the transcript (each sentence is a line said by one person) that is in that cluster. }\textcolor{red}{$\beta$ here is the correlation between P(in that cluster | trans). P() is standardized so can be interpreted in standard deviation coefficients (i.e. if group is 1 SD more likely to utter statements in cluster 1, effect is XXX). Correlation with non-trans quotes are all non-significant (all q-values=1).}	\textcolor{red}{Representative quotes are the 3 sentences actually said by a respondent with the greatest cosine similirity to the cluster's mean embedding.} Ordered by q-value.
\end{tablenotes}
\end{table}



	\begin{figure}[t]
		\centering
		\caption{Transcript-level features that predict reductions in discrimination}
		\label{fig_hypothesis_effects_fct}
			\includegraphics[width=1\linewidth]{../../outputs/figs/hypothesis_effects_fct}
\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: AI generated 500 hypothesis about the differences between random pairs of transcripts, and then rated every transcript according to how much it fulfilled that hypothesis on a scale of 1-10 (see Appendix \ref{sec_appendix_transcript_sentence}). I reduce the dimensionality of the 500 hypothesis using factor analysis to 11 factors. Each hypothesis ``type'' is represented by the specific hypothesis that loads most highly on that type (the labels in the y-axis). I take the average rating at the group level for both discussions without a transgender option and with a transgender option, and then regress these on the probabilty of selecting a transgender worker in the outcome round. There is one regression for each hypothesis type. Coefficients and 95\% CIs are shown for both regressors. All regressions include all main controls from column 3 of \autoref{tab_main}. * $q$ $<$ 0.1, ** $q$$<$ 0.05, *** $q$$<$ 0.01, where $q$ are the multiple-hypothesis-adjusted p-values, taking all 40 coefficients together as a set for adjustment \citep{andersonMultipleInferenceGender2008}.
				\end{tablenotes}
	\end{figure}




%
%\begin{figure}[!htbp]
%	
%	\centering
%	\caption{Videos about transgender rights cause more pro-social reasons in the discussion}
%	\includegraphics[width=\linewidth]{../../outputs/figs/reasons_r1_by_video.pdf}
%	\label{fig_reasons_r1_by_video}
%	\begin{tablenotes}
%		\footnotesize
%		\item  \textit{Notes}: Unit of observation is a group $\times$ choice. Sample is the \textit{3-person discussion} arm in phases 1 and 2, and the \textit{2-person discussion} arm in phase 2. Confidence intervals are based on a bootstrapped binomial distribution. One enumerator observed the discussion and marked the main reasons that the participants said they were selecting the chosen option during the discussion. \textit{Gender} includes saying that the worker is transgender, male, or female. \textit{Pro-social} reasons include (i) wanting to give an opportunity or help the worker, (ii) saying that the worker is also human, (iii) saying that the chosen worker seems poor, (iv) saying "We shouldn't discriminate". \textit{Items} is when participants say they chose the option because it offered more items. \textit{Worker} includes saying (i) it would be easy to talk with the worker, (ii) the choice is based on how the worker looks / the photo, (iii) the worker seeming reliable, (iv) the worker seeming friendly, (v) it being easy to relate to the worker, (vi) the perceived age of the worker. \textit{Worker details} includes reasons based on written details on the worker profile: (i) the reliability score, (ii) whether they speak English, (iii)  their experience, or (iv) their education. \textit{Negative} is when the reason cited is a negative comment about the worker that was not chosen (e.g., the other person looks scary or indecent).
%		%			\item \textit{Panel A}: x-axis is the probability that a given person in the group was marked as speaking positively about transgender workers for a given choice that included a transgender worker.
%		%			Difference between No discussion (private) and 3-person discussion with no positive discussion at all is \input{../../outputs/stats/diff_control_anti_discuss} %%(p=\input{../../outputs/stats/diff_control_anti_discuss_p}).
%		%\item \textit{Panel B}: 
%		%The outcome is whether the participants selected the comparator in the treatment round of their hiring choices.
%	\end{tablenotes}
%\end{figure}





%
%
%
%\begin{table}[!htbp]
%\caption{Effect of rights video on predictions about others}
%\label{tab_videos_norms}
%\resizebox{\textwidth}{!}{
%\input{../../outputs/tables/norms_videos.tex}
%}
%\begin{tablenotes}
%\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Sample includes all participants in both phases.
%\item Column (1): The unit of observation is the participant. The dependent variable is the incentivized prediction of the proportion of other people (how many out of 20) in the study who pick a transgender person to receive a delivery when shown a specific pair of workers. Each participant makes 3 incentivized predictions, one of which includes a transgender worker. Only the choice involving the transgender worker is included for analysis. Column (2): The unit of observation is the participant $\times$ prediction. The dependent variable is whether the participant predicted that another person in their group selected a transgender worker in the private outcome round. The prediction is incentivized. Each participant made 2 predictions (one involving a transgender worker) for each of their 2 group members. The two predictions involving a transgender worker are included for analysis. Controls include stratum fixed effects; dummies for the discussion-arm treatments; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). 
%%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
%%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
%%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
%\end{tablenotes}
%\end{table}
%
%\begin{table}[!htbp]
%\caption{Effect of trans rights videos on attitudes and beliefs}
%\label{tab_videos_attitudes}
%\resizebox{\textwidth}{!}{
%\input{../../outputs/tables/attitudes_videos.tex}
%}
%\begin{tablenotes}
%\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample includes all participants in both phases. \textit{Column (1)}: dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B. \textit{Question FEs} is a fixed effect for List B. \textit{Column (2)}: the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong". \textit{Question FEs} is a fixed effect for the second scenario. \textit{Column (3)}: the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker. Participants make two choices each, one of which includes a transgender photo. The order is randomized. \textit{Question FEs} controls for the order of the choice.
%%In columns (1) and (2), the outcome is whether the \textit{alternative worker} (rather than the male \textit{benchmark worker}) in the private choices in the \textit{outcome round}. In column (3), it is whether the transgender worker was selected. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female. 
%%The mean of the dependent variable when the worker is trans and in the \textit{No discussion (private)} arm indicates that the transgender worker was selected (rather than the male benchmark worker) \input{../../outputs/stats/p_choose_trans_control.tex} of the time. The mean when the worker is male or female in the \textit{No discussion (private)} arm is above 50\% because participants on average prefer female alternative workers to the male benchmark workers.
%%The specification used is seen in equation \ref{eqn_main_spec}.  
%\item Controls include stratum fixed effects; dummies for the discussion-arm treatments; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). 
%
%%In column (2), controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown. Columns (2) and (3) also include controls for the relative \# items offered by the alternative worker, the relative reliability score of the worker, and a dummy for whether the reliability score was shown. randomization inference \textit{p}-values at the base of the table test for differences between treatment effects across treatment arms, i.e., for differences in the interacted terms in columns (1) and (2), and differences in the uninteracted terms in column (3).
%%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
%%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
%%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
%\end{tablenotes}
%\end{table}


\begin{table}[!htbp]
\caption{No evidence of differential attrition}
\label{tab_diff_attrition}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/differential_attrition.tex}
	}
	\begin{tablenotes}

\item \footnotesize	\textit{Notes}:  * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Unit of observation is the participant. Dependent variable is whether the follow-up survey was completed. Column (1) includes only participants in the \textit{No discussion (private)} or the \textit{3-person discussion} arms, in both phases. Column (2) includes all participants in phase 2. Column (3) includes all participants in both phases. includes choices that involved a transgender worker. 
\end{tablenotes}
\end{table}


%	\begin{figure}[!htbp]
%		
%		\centering
%		\caption{Predictions about group members' choices become more accurate in treatment arms, specifically for transgender workers}
%			\includegraphics[width=1\linewidth]{../../outputs/figs/group_predic_accuracy.pdf}
%		\label{fig_group_predic_accuracy}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}:  Unit of observation is the participant $\times$ prediction level. Outcome is the probability that a participant correctly predicts the specific choice made in the private outcome round by their fellow group member. The group predictions are incentivized. Confidence intervals are based on standard errors clustered at the group-of-3 level.
%%			\item  %Hollow bars represent the likelihood of a participants predicting that their neighbor selects a transgender person for a delivery. Filled bars represent the actual probability of choosing a transgender person in the outcome round, restricting to only the choice pairs for which a prediction was made. Only choice-pairs that \textit{do not} include a transgender photo are included.
%		\end{tablenotes}
%	\end{figure}
	





%
%\begin{figure}[t]
%		
%		\centering
%		\caption{Summary of results: effect of all interacted treatment arms on probability of selecting worker in the outcome round}
%			\includegraphics[width=1\linewidth]{../../outputs/figs/big_summary_fig.pdf}
%		\label{fig_big_summary_fig}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: Shows the probability of selecting a worker in the outcome round, relative to the probability of selecting a transgender worker in the \textit{No discussion (private)}, \textit{Control video} arm. Panels (a) to (e) show the probability of selecting a transgender worker in each treatment arm. Panel (f) shows the probability of selecting a non-transgender \textit{alternative} worker, pooling all treatment arms. 95\% confidence intervals are based on standard errors clustered at the group-of-3 level. Controls include stratum fixed effects; whether individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; whether the alternative worker was shown on the right; phase fixed effects; relative number of items offered; relative reliability score; whether the relative reliability score was shown; and the controls selected by double LASSO (see Section \ref{sec_lasso}). Unit of observation is the participant $\times$ choice level. %As a placebo test, the effect of each treatment arm on the probability of selecting \textit{non-transgender} alternative workers is seen in Appendix \autoref{fig_big_summary_fig_placebo}.
%%			This graphically shows the main results from Table \ref{tab_main}. The y-axis shows the effect on the probability of choosing the \textit{alternative} worker relative to the case where \textit{Worker is M/F} in the \textit{No discussion (private)} arm. Confidence intervals are based on column (1) of Table \ref{tab_main}, based on standard errors clustered at the group-of-3 level.
%		\end{tablenotes}
%	\end{figure}


	
	

	\begin{table}[!htbp]
\caption{Treatment round choices (3-person discussion sample, Phases 1 and 2)}
\label{tab_r1_main}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/r1_main.tex}
}
\begin{tablenotes}
\item	\scriptsize \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Randomization inference p-values are in brackets. 
%Unit of observation is the participant $\times$ choice level. 
Sample includes the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phase 1 and 2. 
%Column (3) only includes choices that involved a transgender worker. 
%In columns (1) and (2), the outcome is whether the \textit{alternative worker} was selected in the \textit{treatment round} rather than the male \textit{benchmark worker} during the \textit{treatment round} (i.e. during the discussion for those in the 3-person discussion arm). In column (3), it is whether the transgender worker was selected. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female. 
%The mean of the dependent variable when the worker is trans and in the \textit{No discussion (private)} arm indicates that the transgender worker was selected (rather than the male benchmark worker) \input{../../outputs/stats/p_choose_trans_control.tex} of the time. The mean when the worker is male or female in the \textit{No discussion (private)} arm is above 50\% because participants on average prefer female alternative workers to the male benchmark workers.
The outcomes are based on \textit{treatment round choices}, i.e., during the discussion in the 3-person discussion arm. 
The specification used is seen in equation \ref{eqn_main_spec}, and is otherwise the same as Tables \ref{tab_main} and \ref{tab_videos}.  
%Controls include stratum fixed effects; dummies for the rights videos; whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; whether the alternative worker was shown on the right; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). In column (2), controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown. Columns (2) and (3) also include controls for the relative number of items offered and the relative reliability score (which was always shown in the treatment round).
\end{tablenotes}
\end{table}

%
%	\begin{figure}[!htbp]
%	
%	\centering
%	\caption{Participants in the discussion are more likely to say positive statements about transgender workers than negative statements}
%	\includegraphics[width=0.45\linewidth]{../../outputs/figs/prob_positive_mentions.pdf}
%	\label{fig_prob_positive_mentions}
%	\begin{tablenotes}
%		\footnotesize
%		\item  \textit{Notes}:\textcolor{red}{ Unit of observation is the participant $\times$ choice level. Only choices that include a transgender worker are included. Enumerators coded for each choice whether each participant said a positive statement about the transgender worker, a negative statement about the worker, or both.
%	Participants are \input{../../outputs/stats/ratio_pos_neg_mentions.tex}x (= \input{../../outputs/stats/mean_pos_mentions.tex}/\input{../../outputs/stats/mean_neg_mentions.tex}) more likely to say a positive statement rather than a negative statement about transgender workers in the discussion. Sample used is the \textit{3-person discussion} arm only, in both phase 1 and phase 2.}  %The outcome is whether the participants selected the comparator in the treatment round of their hiring choices.
%	\end{tablenotes}
%\end{figure}


\begin{figure}[htbp]
		\centering
		\caption{Individual-level estimates of willingness to pay to choose a transgender worker in the outcome round}
		\includegraphics[width=\linewidth]{../../outputs/figs/pref_distribution.pdf}
		
		\label{pref_distribution}
		
		\begin{tablenotes}
\footnotesize \item \textit{Notes}: Plots the distribution of individual-level estimates of willingness to pay (WTP) to select a transgender worker. Estimates come from a version of Equation \ref{eqn_main_spec} that includes an interaction of individual fixed effects $\gamma_i$ and $Trans_{ijk}$. WTP is calculated in monetary terms by dividing $\gamma_i$ by the coefficient on relative item value (in Rupees) across the two options. Estimates cluster around 3 peaks corresponding to choosing a transgender worker 2/2 times, 1/2 times, or 0/2 times. Variation between these peaks is driven by variation in the items offered and the characteristics of the worker (e.g., experience, reliability score).	\end{tablenotes}
	\end{figure}


\begin{landscape}
	





\begin{figure}[!htbp]
		
		\centering
		\caption{Predictions about others in group}
			\includegraphics[width=\linewidth]{../../outputs/figs/group_predic_phase_2.pdf}
		\label{fig_group_predic_phase2}
		\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: 		Sample includes all participants.  Unit of observation is participant $\times$ prediction. Only choices that include a transgender photo are included. Hollow bars represent the probability that a participant predicts that their group member selects a transgender delivery worker. The prediction was incentivized. Each participant made 2 predictions (one involving a transgender worker) for each of their 2 group members. The two predictions involving a transgender worker are included for analysis. Filled bars represent the actual probability that participants select a transgender worker in the outcome round (restricting to only choices for which another group member made a prediction). \textit{2-person discussion (predictions about speakers)} includes all predictions made \textit{about} the private choices of the speakers in the discussion. \textit{2-person discussion (predictions about listeners)} includes all predictions made \textit{about} the private choices of the people who just listened to the discussion. %Discussion speakers (pooling 2-person and 3-person discussions) are predicted to choose transgender workers \input{../../outputs/stats/diff_group_predic_listener_vs_speakers.tex} p.p. more than discussion listeners ($p$=\input{../../outputs/stats/p_val_group_predic_listener_vs_speakers.tex}). 

			%			\item \textit{Panel A}: x-axis is the probability that a given person in the group was marked as speaking positively about transgender workers for a given choice that included a transgender worker.
%			Difference between No discussion (private) and 3-person discussion with no positive discussion at all is \input{../../outputs/stats/diff_control_anti_discuss} %(p=\input{../../outputs/stats/diff_control_anti_discuss_p}).
%\item \textit{Panel B}: 
			%The outcome is whether the participants selected the comparator in the treatment round of their hiring choices.
		\end{tablenotes}
	\end{figure}
	
	\end{landscape}

\begin{table}[!htbp]
\caption{Mediation analysis of discussion using mechanism outcomes}
\label{tab_mediation}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/mediation.tex}
	}
	\begin{tablenotes}

\item \footnotesize	\textit{Notes}:  * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample includes the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phase 1 and 2. Only choices involving a transgender worker are included. The dependent variable is whether the transgender worker was selected in the private outcome round choices. Additional variables are based on the mechanism outcomes described in Appendix \ref{sec_data}.
 Controls include stratum fixed effects; dummies for the rights videos; whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; whether the alternative worker was shown on the right; phase fixed effects; relative reliability score; relative items offered; whether the reliability score was shown; and the controls selected by double LASSO (see Section \ref{sec_lasso}).
\end{tablenotes}
\end{table}

	





\begin{table}[!htbp]
\caption{Effects on hypothetical choices after 2--9 weeks for mechanism treatments}
\label{tab_mechs_follow_up}
\centering
\resizebox{0.8\textwidth}{!}{
\input{../../outputs/tables/mechs_follow_up.tex}
	}
	\begin{tablenotes}

\item \footnotesize	\textit{Notes}:  * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample includes all participants in phase 2. Outcome is whether the transgender worker was chosen; only pairs involving a transgender worker are included. 
%The mean of the dependent variable when the worker is trans and in the \textit{No discussion (private)} arm indicates that the transgender worker was selected (rather than the male benchmark worker) \input{../../outputs/stats/p_choose_trans_control.tex} of the time. The mean when the worker is male or female in the \textit{No discussion (private)} arm is above 50\% because participants on average prefer female alternative workers to the male benchmark workers.
 Controls include stratum fixed effects; dummies for the discussion-arm treatments; whether the alternative worker was shown on the right; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}).
\end{tablenotes}
\end{table}




\begin{table}[!htbp]
	\caption{Correlation between dominance in discussion and post-discussion pro-trans choices (3-person discussion arm only)}
	\label{tab_discussion_dominance}
	\centering
	\resizebox{\textwidth}{!}{
		\input{../../outputs/tables/discussion_dominance.tex}
	}
	\begin{tablenotes}
%		\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
%		\item Controls include items and quality stuff
%		\item 
		
		\item	\scriptsize \textit{Notes}: 
		\textit{P(spoke first)} is the probability that a participant spoke first in their group in the discussion of a choice, as marked by enumerator observations. The mean is 33\%. \textit{P(dominated)} is the probability that a participant dominated the discussion of a choice, as marked by enumerator observations. The mean is \input{../../outputs/stats/mean_dominant} (more than one participant can be marked as dominating). \textit{Dominance index (Z)} is the sum of normalized (Z-index) values for \textit{P(spoke first)} and \textit{P(dominated)}.		Only \textit{3-person discussion} arm is included. * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. p-values are in brackets. Unit of observation is the participant $\times$ choice level. Outcome is whether the transgender worker was selected in the private outcome round (i.e., after the discussion).  
Controls include stratum fixed effects; dummies for the rights videos;  whether the alternative worker was shown on the right; the relative \# items offered by the alternative worker, the relative reliability score of the worker, and a dummy for whether the reliability score was shown.  
%In column (2), controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown. Columns (2) and (3) also include controls for the relative \# items offered by the alternative worker, the relative reliability score of the worker, and a dummy for whether the reliability score was shown. randomization inference \textit{p}-values at the base of the table test for differences between treatment effects across treatment arms, i.e., for differences in the interacted terms in columns (1) and (2), and differences in the uninteracted terms in column (3).
		%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
		%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
		%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
	\end{tablenotes}
\end{table}



	\begin{figure}[!htb]
		
		\centering
		\caption{Motivations during the discussion}
			\includegraphics[width=0.8\linewidth]{../../outputs/figs/debrief_spoke_differences_bar}
		\label{debrief_spoke_differences_bar}
		\begin{tablenotes}
			\item 	\footnotesize \textit{Notes}: \input{../../outputs/stats/spoke_pro_trans_mean.tex} of participants in the discussion spoke in favor of trans workers for both choices. $p$-value is calculated using a regression controlling for stratum fixed and rights videos, and clustering standard errors at the group level. Phase 1 only, 3-person discussion participants, $N$=\input{../../outputs/stats/spoke_pro_trans_nobs.tex}.
			%Coefficient in black is using model with LASSO, all other controls in Table 1; Plotted slope is regression with no controls, standard errors clustered at group-of-3 level. \textcolor{red}{What is P(Other group members spoke pro-trans)}
			%Sample includes all participants in the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phases.  Unit of observation is participant $\times$ prediction. Only choices that include a transgender photo are included. Hollow bars represent the probability that a participant predicts that their group-member selects a transgender delivery worker. The prediction was incentivized. Each participant made 2 predictions (one involving a transgender worker) for each of their 2 group members. The two predictions involving a transgender worker are included for analysis. Filled bars represent the actual probability that participants select a transgender worker in the outcome round (restricting to only choices for which another group member made a prediction).
		\end{tablenotes}
	\end{figure}


\begin{figure}[!htbp]
		
		\centering
		\caption{Reasons cited in the 3-person discussions (enumerator observations)}
			\includegraphics[width=0.75\linewidth]{../../outputs/figs/reasons_r1_categories.pdf}
		\label{fig_reasons_r1_categories}
		\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: Unit of observation is a group $\times$ choice. Sample is the \textit{3-person discussion} arm in both phase 1 and 2. Confidence intervals are based on a bootstrapped binomial distribution. One enumerator observed the discussion and marked the main reasons that the participants said they were selecting the chosen option during the discussion. \textit{Gender} includes saying that the worker is transgender, male, or female. \textit{Pro-social} reasons include (i) wanting to give an opportunity or help the worker, (ii) saying that the worker is also human, (iii) saying that the chosen worker seems poor, (iv) saying "We shouldn't discriminate". \textit{Items} is when participants say they chose the option because it offered more items. \textit{Worker} includes saying (i) it would be easy to talk with the worker, (ii) the choice is based on how the worker looks / the photo, (iii) the worker seeming reliable, (iv) the worker seeming friendly, (v) it being easy to relate to the worker, (vi) the perceived age of the worker. \textit{Worker details} includes reasons based on written details on the worker profile: (i) the reliability score, (ii) whether they speak English, (iii)  their experience, or (iv) their education. \textit{Negative} is when the reason cited is a negative comment about the worker that was not chosen (e.g., the other person looks scary or indecent). %Hollow bars represent the likelihood of a participants predicting that their neighbor selects a transgender person for a delivery. Filled bars represent the actual probability of choosing a transgender person in the outcome round, restricting to only the choice pairs for which a prediction was made. Only choice-pairs that \textit{do not} include a transgender photo are included.
		\end{tablenotes}
	\end{figure}


%	\begin{figure}[!htbp]
%		
%		\centering
%		\caption{Reasons cited in the group discussion}
%			\includegraphics[width=\linewidth]{../../outputs/figs/reasons_r1_full.pdf}
%		\label{fig_discussion_reasons}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: Unit of observation is a group $\times$ choice. Sample is the \textit{3-person discussion} arm in both phase 1 and 2.  One enumerator observed the discussion and marked the main reasons that the participants said they were selecting the chosen option during the discussion. Outcome is the proportion of choices during the discussion during which each reason was cited as the main reason for their choice.
%		\end{tablenotes}
%	\end{figure}








	


	
%\begin{figure}[!htbp]
%		
%		\centering
%		\caption{Retrospective reasons for choices in outcome round (disaggregated)}
%			\includegraphics[width=\linewidth]{../../outputs/figs/reasons_for_choices_r2.pdf}
%		\label{fig_reasons_for_choices_r2}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: In phase 2, after participants finished the full set of hiring choices, they were asked \textit{why} they chose 4 randomly selected  options in the outcome round. Unit of observation is the participant $\times$ choice $\times$ reason. Outcome is whether the reason was given when asked why the participant made their choice for a given pair of options from the outcome round.  \textit{No discussion (pooled)} includes all participants in the \textit{No discussion (private)} and \textit{No discussion (public)} arms. \textit{Discussion (pooled)} includes speakers in the \textit{2-person discussion} and \textit{3-person discussion} arms, and does not include listeners. Confidence intervals are calculated based on standard errors clustered at the group-of-3 level. Left panel includes only choices that did not include a transgender worker. Right panel includes only choices that included a transgender worker.
%			\item 
%			%			\item \textit{Panel A}: x-axis is the probability that a given person in the group was marked as speaking positively about transgender workers for a given choice that included a transgender worker.
%%			Difference between No discussion (private) and 3-person discussion with no positive discussion at all is \input{../../outputs/stats/diff_control_anti_discuss} %%(p=\input{../../outputs/stats/diff_control_anti_discuss_p}).
%%\item \textit{Panel B}: 
%			%The outcome is whether the participants selected the comparator in the treatment round of their hiring choices.
%		\end{tablenotes}
%	\end{figure}
		
		
		
%	
%	\begin{table}[!htbp]
%\caption{Positive correlation between a reason being cited in a discussion and reasons for choices in the outcome round}
%\label{tab_r1_r2_reasons_corr}
%\resizebox{\textwidth}{!}{
%\input{../../outputs/tables/r1_r2_reasons_corr.tex}
%}
%\begin{tablenotes}
%\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level. $p$-values are in brackets (not using randomization inference). Sample includes all participants in the \textit{3-person discussion} and \textit{2-person discussion} arm, only in phase 2 (when data on the reasons for choices in outcome round was collected). Unit of observation is the reason $\times$ participant $\times$ choice. \textit{Reason was cited about outcome round choice} is an indicator variable for whether the participant cited a given reason when asked later in the survey what the reason for their choice was in the private outcome round. Column (1) includes all outcome-round choices, both choices that did and didn't include transgender workers. Column (2) only includes choices that involved a transgender worker. Possible reasons are described the notes to \autoref{fig_reasons_r1_categories}. \textit{Reason was cited in discussion} is whether during the discussion (the treatment round) the reason was cited as the main reason for choosing the final choice (for any choices). \textit{Reason was cited in discussion for transgender worker} is whether the reason was cited as the main reason for choosing the final choice (for only choices that involve a transgender worker).
%\end{tablenotes}
%\end{table}


%
%	\begin{table}[!htbp]
%\caption{How much discussion occurred?}
%\label{tab_amount_discussed}
%\resizebox{\textwidth}{!}{
%\input{../../outputs/tables/amount_discussed.tex}
%}
%\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
%			\textit{Amount discussed for each choice} was the rating given by the enumerator who observed the discussion in response to the question: "How much discussion occurred about this pair?" The unit of observation is group $\times$ choice.
%			\textit{Amount discussed overall in a group} was the rating given by the observing enumerator in response to the question: "In general, how much discussion occurred?" The unit of observation is the group. The scale for both ratings was: 0 = No discussion (e.g., only choice is made); 1 = 
%Very little discussion (e.g., one person mentions one reason); 2 = Some discussion (e.g., multiple persons mention one reason for choice); 3 = Lots of discussion (e.g., multiple persons mention multiple reasons for choice). \textit{Amount discussed for each choice} is normalized so that the it is centred around the mean for non-trans choices in the 3-person discussion, and divided by the standard deviation for those observations. \textit{Amount discussed overall} is normalized so that is centred around the mean for 3-person discussions (in phase 2), and divided by the standard deviation for this group. \input{../../outputs/stats/se_note.tex}
%		\end{tablenotes}
%\end{table}





\begin{figure}[htbp]
		\centering
		\caption{Word cloud: most relatively common words in discussions about transgender workers (excluding the word ``transgender'')}
\vspace{-5.5em}
			\includegraphics[width=0.55\linewidth]{../../outputs/figs/word_cloud.pdf}
		\label{fig_word_cloud}
		
		\vspace{-4em}
		\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: Using transcript data from discussions, I count the relative frequency of all words used by participants. I exclude stop words such as ``a'' or ``the''. The cloud shows words for which the \textit{difference} in relative frequency when discussing transgender workers vs non-transgender workers is highest. The word ``transgender'' scores highest on this measure but is excluded.
			%This shows the location of each survey. Red dots denote surveys from phase 1. Blue dots denote surveys from phase 2. %Hollow bars represent the likelihood of a participants predicting that their neighbor selects a transgender person for a delivery. Filled bars represent the actual probability of choosing a transgender person in the outcome round, restricting to only the choice pairs for which a prediction was made. Only choice-pairs that \textit{do not} include a transgender photo are included.
		\end{tablenotes}
	\end{figure}



\begin{table}[!htbp]
\caption{No detectable heterogeneity by discussant persuasiveness or group relations}
\label{tab_het_group}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/het_group_characs.tex}
}
\begin{tablenotes}
\item \scriptsize	\textit{Notes}: \textit{High persuasiveness score for discussants}: Above median score for the other two participants in a group on an index of persuasiveness. Index is constructed using a weighted sum of the ratings out of 10 given for the following character traits of other participants: (i) confident; (ii) quiet; (iii) like a leader; (iv) shy; (v) talkative; (vi) admirable; (vii) inspiring. See Section \ref{sec_persuasiveness} for details.

\textit{Close relations with others in group}: Above median score on an index of perceived relationships with other participants in the group (see section \ref{sec_group_relations} for full details). The index is constructed using a weighted sum of (i) whether the other participant is a close family member, (ii) another family member, (iii) a friend, or (iv) simply a neighbor; (v) how long they have known the other participant; (vi) how often they talk to the other participant; (vii) how often they ask the other participant for advice; (viii) how often they ask for recommendations for what to buy; (ix) how often they tell secrets to the other participant. For each participant, I take the mean score of their ratings for the two other participants in their group to get a score at the participant level.

\item * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01.  Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample in columns 1-2 includes only \textit{No discussion (private)} and \textit{listeners} in the \textit{2-person discussion} arm. Sample in columns 3-4 includes only \textit{No discussion (private)} and \textit{3-person discussion} arms. Only phase 2 of data collection is included (when group relationships were elicited). The outcome is whether the transgender worker was selected in the private outcome round, restricting analysis to only choices that include a transgender worker. Additional controls include stratum fixed effects; dummies for the rights videos; whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; whether the alternative worker was shown on the right; phase fixed effects; relative \# items offered; relative reliability score; whether the reliability score was shown.
%\item \textit{High persuasiveness score for discussants} is defined based on the index of persuasiveness seen in Appendix \ref{sec_persuasiveness}. The variable is 1 if the average score based on the listener's responses about the two discussants is higher than the average score based on the two discussants' responses about the listener. Intuitively, this describes whether the random selection of discussants selects people who are relatively more persuasive.
%\item \textit{Close relations with others in group} is defined based on the index of the closeness of relationships between participants in a group, seen in Appendix \ref{sec_group_relations}. For each participant, I use their responses to calculate relationship scores for the other two in their group. I take the average of these two scores. The variable used is 1 if this average is greater than the sample median, and 0 otherwise.

% when the participant rated others in their group to have average score that individual based on the index defined in Appendix \ref{sec_group_relations}.
%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
\end{tablenotes}
\end{table}






















%
%
%\begin{figure}[!htbp]
%		
%		\centering
%		\caption{Participants correctly recall earlier choices that involve transgender workers}
%			\includegraphics[width=\linewidth]{../../outputs/figs/mem_check_plot.pdf}
%		\label{fig_mem_check}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: 
%			\item Only phase 2 is included (when recall was tested).
%			\item Participants are asked approximately 5 minutes after they make their outcome round choices to recall 4 earlier choices that were made in the survey. The type of recall question depends on the treatment arm they are in. Unit of observation is the participant $\times$ choice. Only choices involving transgender workers are included.
%			\item \textit{Left panel}:  The outcome is whether the participant correctly recalled their \textit{own} choices from the treatment round.
%			\item\textit{Central panel}: 
%			Participants in the \textit{No discussion (public)} arm were told the choices that others in their group made. Later, for each choice, each participant is asked whether (i) both of the others picked choice A; (ii) both of the others picked choice B; or (iii) one other group-member picked A and one other group-member picked B. The outcome is coded as 0 if the participant says that both picked A, and in reality they both picked B (or vice versa); 1 if the picks the correct answer; and 0.5 if they get a partially correct answer (for example, if they say both picked A and in reality one picked A and the other picked B). The $y$-axis is the mean of this outcome.
%			\item \textit{Right panel}: Participants in the discussion arms (including listeners) were asked to recall the choices that were made in the discussion. The outcome is whether they correctly recall these choices.
%						%			\item \textit{Panel A}: x-axis is the probability that a given person in the group was marked as speaking positively about transgender workers for a given choice that included a transgender worker.
%%			Difference between No discussion (private) and 3-person discussion with no positive discussion at all is \input{../../outputs/stats/diff_control_anti_discuss} %%(p=\input{../../outputs/stats/diff_control_anti_discuss_p}).
%%\item \textit{Panel B}: 
%			%The outcome is whether the participants selected the comparator in the treatment round of their hiring choices.
%		\end{tablenotes}
%	\end{figure}


	


%	
%	\begin{figure}[!htbp]
%		
%		\centering
%		\caption{Recall of choices: no differences between choices involving transgender workers and choices that don't include transgender workers}
%			\includegraphics[width=\linewidth]{../../outputs/figs/mem_check_trans_vs_non.pdf}
%		\label{fig_mem_check_trans_non_trans}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: 
%			\item Only phase 2 is included (when recall was tested).
%			\item Participants are asked approximately 5 minutes after they make their outcome round choices to recall 4 earlier choices that were made in the survey. The type of recall question depends on the treatment arm they are in. Unit of observation is the participant $\times$ choice. Red bars only include choices that do not involve transgender workers. Blue bars include only choices that involve transgender workers.
%			\item \textit{No discussion (private)}:  The outcome is whether the participant correctly recalled their \textit{own} choices from the treatment round.
%			\item\textit{No discussion (public)}: 
%			Participants in the \textit{No discussion (public)} arm were told the choices that others in their group made. Later, for each choice, each participant is asked whether (i) both of the others picked choice A; (ii) both of the others picked choice B; or (iii) one other group-member picked A and one other group-member picked B. The outcome is coded as 0 if the participant says that both picked A, and in reality they both picked B (or vice versa); 1 if the picks the correct answer; and 0.5 if they get a partially correct answer (for example, if they say both picked A and in reality one picked A and the other picked B). The $y$-axis is the mean of this outcome.
%			\item \textit{2-person} and \textit{3-person discussion}: Participants in the discussion arms (including listeners) were asked to recall the choices that were made in the discussion. The outcome is whether they correctly recall these choices.
%			\item $p$-value of \textit{Choice includes trans} in a regression of the outcomes that include treatment dummies is \input{../../outputs/stats/p_val_mem_check_diff_trans.tex}.
%						%			\item \textit{Panel A}: x-axis is the probability that a given person in the group was marked as speaking positively about transgender workers for a given choice that included a transgender worker.
%%			Difference between No discussion (private) and 3-person discussion with no positive discussion at all is \input{../../outputs/stats/diff_control_anti_discuss} %%(p=\input{../../outputs/stats/diff_control_anti_discuss_p}).
%%\item \textit{Panel B}: 
%			%The outcome is whether the participants selected the comparator in the treatment round of their hiring choices.
%
%		\end{tablenotes}
%	\end{figure}
	







%


%
%	\begin{table}[!htbp]
%\caption{Discussion impact is robust to controlling for different ways of calculating the social desirability score}
%\label{tab_sdb_robustness}
%\resizebox{\textwidth}{!}{
%\input{../../outputs/tables/sdb_robustness.tex}
%}
%\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: In column (1), the SDB score is corrected for acquiescence bias. In column (2), I run a factor analysis on the SDB score, and use the resulting index. In column (3), I use an index calculated using inverse-covariance weights, as seen in  \citet{andersonMultipleInferenceGender2008}. More detail on the construction of the SDB scores is found in Appendix \ref{sec_data_sdb}. 
%			\item * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. p-values are in brackets, and use randomization inference for the \textit{3-person discussion} coefficients. Unit of observation is the participant $\times$ choice level. Sample includes the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, phase 1 only. Only choices that include a transgender worker are included. The outcome is whether the participant chose the transgender worker in the private outcome round.Additional controls in all columns include: stratum fixed effects; phase fixed effects (for columns 1 and 2 only); dummies for rights videos; and controls selected by double LASSO (see Section \ref{sec_lasso}). 
%%			\item Only choice-pairs from the outcome round that include a transgender photo are included. The dependent variable in all columns is whether the transgender person was selected. All SDB scores are based on the \citet{crowne1960marlowe} index. 
%		\end{tablenotes}
%\end{table}







		
%	\subsection{Additional main results}
	
%	\begin{figure}[!htbp]
%		
%		\centering
%		\caption{Phase 2: effect of treatments on probability of selecting transgender}
%			\includegraphics[width=\linewidth]{../../outputs/figs/main_bar_phase_2.pdf}
%		\label{fig_bar_phase_2}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: %The outcome is whether the participants selected the comparator in the treatment round of their hiring choices.
%		\end{tablenotes}
%	\end{figure}
%	
%	
%	\begin{figure}[!htbp]
%		
%		\centering
%		\caption{Phase 2: effect of treatments on probability of selecting transgender}
%			\includegraphics[width=\linewidth]{../../outputs/figs/main_bar_phase_2.pdf}
%		\label{fig_bar_phase_2}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: %The outcome is whether the participants selected the comparator in the treatment round of their hiring choices.
%		\end{tablenotes}
%	\end{figure}
	
	
	
	







%\begin{table}[!htbp]
%\caption{Messaging vs law - substitute / complement with discussion?}
%\label{tab_video_comp_sub}
%%\resizeboxmessaging{\textwidth}{!}{
%\input{../../outputs/tables/messaging_vs_law_interact.tex}
%%}
%\begin{tablenotes}
%\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
%\end{tablenotes}
%\end{table}





















%
%
%\begin{table}[!htbp]
%\caption{Discussion effects persist in the long-term follow-up}
%\label{tab_long_term}
%\resizebox{\textwidth}{!}{
%\input{../../outputs/tables/follow_up.tex}
%}
%\begin{tablenotes}
%%\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
%\item	\footnotesize \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample includes the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phase 1 and 2. Column (3) only includes choices that involved a transgender worker. 
%In columns (1) and (2), the outcome is whether the \textit{alternative worker} (rather than the male \textit{benchmark worker}) in the non-incentivized choices in the follow-up survey. In column (3), it is whether the transgender worker was selected. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female. The specification used is seen in equation \ref{eqn_main_spec}.  Controls include stratum fixed effects; dummies for the rights videos; whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; whether the alternative worker was shown on the right; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). In column (2), controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown. In the follow-up survey, workers in a pair always had the same reliability score and offered same number of items. 
%%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
%%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
%%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
%\end{tablenotes}
%\end{table}
%
%
%
%
%
%\begin{table}[!htbp]
%\caption{Video effects do not persist}
%\label{tab_long_term}
%\resizebox{\textwidth}{!}{
%\input{../../outputs/tables/follow_up_video.tex}
%}
%\begin{tablenotes}
%\item
%%\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
%%\item	\footnotesize \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample includes the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phase 1 and 2. Column (3) only includes choices that involved a transgender worker. 
%%In columns (1) and (2), the outcome is whether the \textit{alternative worker} (rather than the male \textit{benchmark worker}) in the non-incentivized choices in the follow-up survey. In column (3), it is whether the transgender worker was selected. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female. The specification used is seen in equation \ref{eqn_main_spec}.  Controls include stratum fixed effects; dummies for the rights videos; whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; whether the alternative worker was shown on the right; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). In column (2), controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown. In the follow-up survey, workers in a pair always had the same reliability score and offered same number of items. 
%%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
%%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
%%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
%\end{tablenotes}
%\end{table}


	
	
	
%		\begin{figure}[!htbp]
%		
%		\centering
%		\caption{No order effects on treatment effects}
%			\includegraphics[width=\linewidth]{../../outputs/figs/order_effects.pdf}
%		\label{fig_order_effects}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: F-test tests for the equality of treatment effects for the discussion across all rounds using a regression that controls for stratum fixed effects, rights videos, delivery incentives, whether the alternative worker is on the right.
%			
%			%The outcome is whether the participants selected the comparator in the treatment round of their hiring choices.
%		\end{tablenotes}
%	\end{figure}
	
	
	
	
	
	
	
		
	
%	\subsection{Robustness}
	




	
	








%	\begin{figure}[!htbp]
%		
%		\centering
%		\caption{High-stakes condition}
%			\includegraphics[width=\linewidth]{../../outputs/figs/delivery_incentive.pdf}
%		\label{fig_high_stakes}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: This shows the probability of selecting the alternative worker for the subsample of individuals who were either randomized into receiving 1 delivery (N = 288) or 3 deliveries (N = 294). 
%		\end{tablenotes}
%	\end{figure}










 
\clearpage


	




%\subsection{Discussion mechanisms}x



%
%	\begin{table}[!htbp]
%\caption{Selection of transgender is associated with others' behavior in discussion}
%\label{tab_r2_heterogeneity_by_discussions}
%\resizebox{\textwidth}{!}{
%\input{../../outputs/tables/r2_heterogeneity_by_discussions.tex}
%}
%\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
%%			\item Only choice-pairs from the outcome round that include a transgender photo are included. The dependent variable in all columns is whether the transgender person was selected. All SDB scores are based on the \citet{crowne1960marlowe} index. In column (1), the SDB score is corrected for acquiescence bias. In column (2), I run a factor analysis on the SDB score, and use the resulting index. In column (3), I use an index calculated using inverse-covariance weights, as seen in  \citet{andersonMultipleInferenceGender2008}. More detail on the construction of the SDB scores is found in Appendix \ref{sec_data_sdb}.
%		\end{tablenotes}
%\end{table}






	
	
	
	
	
	
	
	
	
	
%
%\begin{figure}[!htbp]
%		
%		\centering
%		\caption{Heterogeneity by narratives heard}
%			\includegraphics[width=\linewidth]{../../outputs/figs/heterogeneity_narratives.pdf}
%		\label{fig_narratives_heard}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: 
%			Each coefficient is the coefficient estimate from a regression of outcome round selection on indicator variables for having heard a justification at least once in the treatment round (restricting to only choices that included a transgender person), restricting to only people who took part in or listened to a discussion.
%			%Hollow bars represent the likelihood of a participants predicting that their neighbor selects a transgender person for a delivery. Filled bars represent the actual probability of choosing a transgender person in the outcome round, restricting to only the choice pairs for which a prediction was made. Only choice-pairs that \textit{do not} include a transgender photo are included.
%		\end{tablenotes}
%	\end{figure}

\clearpage




%
%	\begin{figure}[!htbp]
%		
%		\centering
%		\caption{Evidence of asymmetric persuasion: heterogeneity by content of discussion}
%			\includegraphics[width=\linewidth]{../../outputs/figs/heterogeneity_by_r1.pdf}
%		\label{fig_het_by_positive_discussions}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: 
%			\item Confidence intervals are based on regression with clustering at the group level.
%			\item \textit{Panel A}: $x$-axis is the probability that a given person in the group was marked as speaking positively about transgender workers for a given choice that included a transgender worker.
%			Difference between No discussion (private) and 3-person discussion with no positive discussion at all is \input{../../outputs/stats/diff_control_anti_discuss} %(p=\input{../../outputs/stats/diff_control_anti_discuss_p}).
%\item \textit{Panel B}: 
%			%The outcome is whether the participants selected the comparator in the treatment round of their hiring choices.
%		\end{tablenotes}
%	\end{figure}
	
	
	
	
	
	

\clearpage






\newpage


\clearpage



		

%\clearpage



\section{Experimental design: further detail}
\label{sec_appendix_exp_design}

\textbf{Item randomization}. The number of grocery items on offer was set so that within a pair, both options had an equal number of items 60\% of the time, one option had one extra item 30\% of the time, and one option had two extra items 10\% of the time.


\textbf{Reliability score}. Some choice-pairs reported the true proportion of successful deliveries from a set of timed training exercises carried out by all workers (the ``reliability score").  	This reliability score incorporated exogenous variation in the perceived quality of each worker. Participants were told that this was the proportion of completed deliveries from a training exercise. Workers completed multiple training exercises with different time limits, and I randomly showed their score within one of three categories: their low score (5 or 6), their mid-value score (7 or 8), or their high score (9 or 10). (see \autoref{sec_ethics} for discussion of the ethical considerations).%Participants were told that the reliability score was the proportion of completed deliveries from a training exercise. Workers completed multiple training exercises with different time limits, and I randomly showed their score within one of three categories: their low score (5 or 6), their mid-value score (7 or 8), or their high score (9 or 10). (see \autoref{sec_ethics} for discussion of the ethical considerations).

 \textbf{Other signals of quality}. In addition, for some choice-pairs, I truthfully reported (i) whether workers had 0-4 years or 5 years or more of work experience, and (ii) whether the worker spoke both Tamil and English or just Tamil. I sampled photos so that these characteristics were balanced across each worker gender.%\footnote{For experience and language, I always sampled worker photos so that these characteristics were balanced across each worker gender, to avoid participants making inferences on the "quality" of a gender over the course of the experiment.}


\textbf{Treatment-round protocols for mechanism treatments}. To make social image concerns salient, \textit{No discussion (public)} participants chose in a group setting. 
To ensure participants did not influence each other \textit{during} the elicitation process, they were told not to show others their choices and to remain silent. Participants saw others' responses only \input{../../outputs/stats/prop_choosing_only_saw.tex} of the time, and someone commented on a delivery option in only \input{../../outputs/stats/prop_choosing_only_spoke.tex} of groups. All three participants in a given group always saw the same delivery options in the treatment round, regardless of treatment status. \input{../../outputs/stats/prop_listener_silent.tex} listeners stayed silent about the choices.

\textbf{Privacy in outcome round}. The main results are robust to to dropping the \input{../../outputs/stats/prop_others_could_hear_answers_rev.tex} of cases where the outcome round was overheard by neighbors (\autoref{tab_protocol_fidelity}).


%\textbf{Protocol fidelity}.  \input{../../outputs/stats/prop_listener_silent.tex} listeners stayed silent about the choices, and \textit{No discussion (public)} participants did not influence each other during the elicitation process, (only \input{../../outputs/stats/prop_choosing_only_saw.tex} saw each other's responses and \input{../../outputs/stats/prop_choosing_only_spoke.tex} of groups had someone comment on a delivery option out loud).

 
\textbf{Video attention and comprehension}. To ensure participants could hear the video and were concentrating fully, participants always watched the video alone using headphones, rather than in a group. All participants in a group-of-3 watched the same video, but were not told explicitly that others had seen the same video. After watching the video, they were asked comprehension questions about the content (and were corrected if they did not answer correctly), and then read the script of the video text again for 2 minutes.



\begin{table}[!htbp]
\caption{Robustness to protocol fidelity}
\label{tab_protocol_fidelity}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/protocol_fidelity.tex}
	}
	\begin{tablenotes}

\item \footnotesize	\textit{Notes}:  Sample in column 1 includes the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phase 1 and 2, but excludes cases where the respondent said that others could hear their private outcome-round responses. Column 2 is the phase 2 sample, but excludes the cases when the listener spoke during the 2-person discussion. Column 3 is the phase 2 sample, but excludes cases when any of the \textit{No discussion (public)} participants spoke during the treatment round, which was supposed to be silent. * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Unit of observation is the participant $\times$ choice level.  Only choices involving a transgender worker are included. The dependent variable is whether the transgender worker was selected in the private outcome round choices. 
 Controls include stratum fixed effects; dummies for the rights videos; whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; whether the alternative worker was shown on the right; phase fixed effects; relative reliability score; relative items offered; whether the reliability score was shown; and the controls selected by double LASSO (see Section \ref{sec_lasso}).
\end{tablenotes}
\end{table}




\clearpage



\section{Discussion design details}
\label{sec_discussion_design}


To encourage people to speak up in the  discussions, the surveyor leading the discussion asked icebreaker questions before the treatment round started.
%	
%	, including about which celebrity they would prefer to deliver to their home and their opinion on the working conditions for delivery workers. 
	In the \textit{No discussion (public)} arm, participants sat together in a group and also took part in this icebreaker activity. \textit{No discussion (private)} participants were asked the same icebreaker questions, but individually and in private.
	
To encourage discussion about a number of different characteristics in the treatment round, 2 out of the 4 choice-pairs in the treatment round included information about experience and language for both workers, and all choice-pairs included the reliability score for both workers.

The enumerator who led the discussion was told to prompt participants to speak using neutral questions that did not lead the participants to prefer one option or the other (for example, ``What are the differences between A and B?''). They were also explicitly told never to use the word ``transgender'' themselves, in order to avoid revealing the purpose of the experiment to the participants.

For the 2-person discussion, the enumerator leading the discussion also asked the listener if he or she heard the choice being made by the speakers, along with the reason given by the speakers. If the listener did not hear, the speakers were asked to repeat themselves.

The discussion script used by the enumerator leading the discussion is below (Section  \ref{sec_discussion_script}). 


\subsection{Written discussion script for facilitator}
\label{sec_discussion_script}

\vspace{-2em}
	\hspace{-2em} \includegraphics[scale=0.82,page=1]{../../survey_materials/discussion_guide.pdf}




\clearpage



\section{Ethical considerations}
\setcounter{table}{0}
\setcounter{figure}{0}
\label{sec_ethics}


%\textcolor{red}{CUT THIS TO 1 page!!}

%Understanding how to reduce discrimination towards the transgender community in India is of social importance.
The study aimed to obtain a revealed-preference measure of discrimination based on real-stakes choices. Using real rather than hypothetical choices was crucial for reducing concerns social desirability bias and experimenter demand effects, and therefore for understanding methods for reducing such discrimination. However, this approach meant balancing multiple ethical considerations---primarily avoiding explicit deception of respondents while also protecting transgender workers from harm.

%There is an important concern that the protocol may be seen as deceptive for the participants because they were unlikely to receive a delivery from a transgender worker. This concern, nevertheless, had to be balanced against the risks that transgender workers would have faced if they had carried out deliveries. If transgender workers were to visit the homes of participants, they could have been vulnerable to stigma and abuse. The randomization was designed to avoid this as much as possible while also truthfully telling participants that they could receive a delivery from any of the workers they chose. For the few transgender workers who actually carried out a delivery, the worker was accompanied by a full team of 2-3 enumerators throughout the entire process. Interaction between the transgender worker and participant was reduced to a minimum, and the other enumerators were trained to avoid conflict and protect the worker. This design protected transgender workers as much as possible while also not deceiving respondents and allowing truthful revealed preference responses.

An important concern is that participants might view the protocol as deceptive because they were unlikely to receive a delivery from a transgender worker. This concern had to be weighed against the risks faced by transgender workers performing deliveries. These workers could be exposed to stigma and abuse when visiting participants' homes. The randomization aimed to minimize this risk while truthfully telling participants they could receive a delivery from any chosen worker. For the few transgender workers who performed deliveries, each was accompanied by 2-3 enumerators. Interaction was minimized, and these enumerators were trained to avoid conflict and protect the worker. This design protected transgender workers while maintaining truthful revealed preference responses.

%A second concern is that while the reliability score  shown on some of the worker profiles was truthful, participants were not given enough context to interpret it correctly. Using the reliability score was important for examining whether discrimination against transgender workers in this context was \textit{statistical}. If discrimination was reduced when the score was displayed, this would indicate that statistical discrimination contributed to total discrimination. This distinction has important policy implications:  it may be easier to reduce statistical discrimination by informing about transgender workers' productivity, whereas reducing taste-based discrimination requires changing attitudes or leveraging social pressure.

A second concern is that participants lacked sufficient context to properly interpret the reliability score shown on some worker profiles, though the score itself was truthful. Using the reliability score was important for examining whether discrimination against transgender workers was statistical. The reliability score truthfully reported how many deliveries a worker successfully completed in a training exercise. However, participants were unaware that workers undertook multiple training exercises of different durations, yielding different scores. Participants were told (e.g.) ``8/10 means that out of 10 deliveries they had to make in a training exercise, 8 times they successfully delivered.'' All information provided was true, without additional false details about the exercise's nature: they weren't told that the exercise was conducted only once. This mimics real-world situations, where employers often have incomplete information about job candidates.

Finally, participants' primary concern about reliability was ensuring delivery receipt. In practice, we completed deliveries to \input{../../outputs/stats/attrition_perc.tex} of them. Participants were therefore always selecting an extremely reliable delivery worker, so any misleading inference from the reliability score did not materially affect them.

% The reliability score was a truthful report of how many deliveries a worker successfully carried out in a training exercise. However, participants were unaware that workers carried out multiple training exercises with different durations, and that these yielded different scores. Participants were told (e.g.) ``\textit{8/10 means that out of 10 deliveries they had to make in a training exercise, 8 times they successfully delivered.}'' Importantly, everything they were told was true, and they were not given any additional false detail about the nature of the exercise: for example, they were not told that the exercise was only done once. This mimics real-world situations, in which employers often have incomplete sources of information about job candidates. 
 
% Finally, the primary reason participants cared about reliability was to ensure that they would actually receive a delivery. In practice, we completed deliveries to \input{../../outputs/stats/attrition_perc.tex} of the participants. Participants were therefore always in fact choosing a delivery worker who was extremely reliable, so any misleading inference from the reliability score did not cost them materially. 
 
 
% We had no reports that participants had asked more about the nature of the training exercise, indicating that they did not deem this information relevant for interpreting the score.
 
 
% NO HARM DONE - 
 
 
% Moreover, the score was an objective performance indicator, rather than a subjective evaluation that 
 
 
 
 
% In real-world situations, employers often have incomplete information about job applicants 









%However, participants were led to believe that 


%told explicitly that it was a training exercise

%\clearpage


\clearpage




	\section{Video scripts}
	\label{sec_video_scripts}
	
	\vspace{-5em}
	\hspace{-5em} \includegraphics[scale=0.85,page=1]{../../survey_materials/video_scripts.pdf}
	
	\newpage
	
	\vspace{-5em}
	\hspace{-5em} \includegraphics[scale=0.85,page=2]{../../survey_materials/video_scripts.pdf}




\section{Pre-analysis plan}
\label{sec_preanalysis_plan}

	\setcounter{table}{0}
\setcounter{figure}{0}



The study was pre-registered in the AEA registry under the ID \# AEARCTR-0010953. Two pre-analysis plans were uploaded: the first in March 2023, corresponding to the start of phase 1, and the second in May 2023, corresponding to the start of phase 2. In phase 1, I faced data quality issues and unexpectedly low survey productivity in the first 2 days of data collection. This, along with a tight budget, meant that I decided to cut the sample size and the survey length, resulting in design changes relative to the phase 1 pre-analysis plan. As noted in the main text, phase 2 of data collection was added to the design upon the receipt of additional funding in the course of the experiment, resulting in the updated pre-analysis plan.  I outline all the deviations from the pre-analysis plan, along with their justifications, below.

\subsection{Phase 1}
\begin{itemize}
\itemsep-0.5em
  \item \textit{Mixed-video arm}. In phase 1, I had planned to include 450 individuals in a ``mixed-video'' arm. Because of budget constraints and low productivity, I decided at the start of phase 1 to remove this treatment condition, reducing the planned sample size. Because of this, I also dropped the plan to analyze spillover effects between individuals in a group.
  \item \textit{High-stakes condition}. In phase 1, I had planned to randomize half of every treatment group into the ``high-stakes condition'' (i.e., for them to receive 3 deliveries instead of 1). However, because this tripled the expenditures on grocery items, I decided to restrict the randomization to only a subsample of approximately 200 groups, half of whom were allocated to the high-stakes condition.
  \item \textit{Attitude questions}. Participants' understanding of the measure of attitudes (``Disapproval of discrimination'') that I had planned to use appeared to be poor, so I replaced it with a simpler self-reported attitude question.
  \item \textit{Other mechanism questions}. In order to reduce the length of the survey, I also dropped some secondary mechanism measures, including: (i) an implicit association test; (ii) whether discrimination can lead to legal consequences; (iii) the perceived similarity index; (iv) some controls, including the number of children in a household, smartphone ownership, a measure of willingness to persuade in discussions, and a proxy for baseline progressive social attitudes.
  \item \textit{Restriction to urban}. In order to reduce transport costs, I carried out surveys only in urban areas, instead of both rural and urban areas.
\end{itemize}

\subsection{Updates for phase 2}
%In phase 2, I made the following changes to the design:
\begin{itemize}
\itemsep-0.5em
  \item The \textit{No discussion (public)} and \textit{2-person discussion} arms were added.
  \item \textit{Additional mechanism outcomes}. To allow for further analysis of the mechanisms behind the group discussion, I added measures of (i) relationships between group members, (ii) persuasiveness of group members, (iii) private grocery pick-up choices, and (iv) memory checks (i.e., how well do participants remember their own and others' choices).
 \item \textit{Removed mechanism outcomes}. To avoid the survey becoming too long, I removed the measure of salience and the measure of social desirability score for phase 2.
\end{itemize}

\subsection{Other changes}

\begin{itemize}
\itemsep-0.5em
  \item \textit{Delivery time}. I originally planned to carry out follow-up surveys and deliveries in parallel to the main surveys. However, it became clear that this was logistically infeasible, so I instead chose to carry out all deliveries at the end of each phase. This meant that the delivery time was 2--9 weeks, instead of the pre-specified 1 week.
\item \textit{Discussion recordings}. I planned to use discussion transcripts to encode a ``Probability of endorsing'' variable, separately for each individual. However, it became clear that it was not feasible for enumerators to accurately attribute each statement to a specific participant, so I do not include this for analysis.
\end{itemize}


\subsection{Pre-specified analyses}

Here I describe analyses that I specified in the pre-analysis plan, but which are not presented as main results in the text.
\begin{itemize}
\itemsep-0.5em
  \item \textit{Video and discussion interactions}. The fully interacted specification that includes all video arms and the 3-person discussion arm variation was pre-specified and is shown in \autoref{tab_video_interactions}.
  \item \textit{Pooled phase 2 results}. In the phase 2 pre-analysis plan, I described that I would pool some treatment arms (see Figure 1 in the pre-analysis plan). As prespecified, I pooled the 2-person discussion and 3-person discussion participants when analyzing the treatment round (\autoref{tab_mech_r1_r2}). However, for reader clarity in the main text I did not pool any treatment arms when presenting the phase 2 outcome round results (\autoref{tab_mech_r1_r2}). The corresponding pooled results are presented in \autoref{tab_main_phase_2_pooled}.
  \item \textit{Heterogeneity with respect to round 1 observations}. \autoref{tab_effect_of_announce} shows the heterogeneous effects of \textit{observers}' choices with respect to the round 1 choices they observed. \autoref{tab_effect_of_listening} shows the heterogeneous effects of \textit{listeners}' choices with respect to the round 1 choices they listened to.
  \item  \textit{Heterogeneity with respect to group composition}. \autoref{tab_het_group} shows heterogeneous effects of the discussion with respect to persuasiveness and group relations. I find no detectable heterogeneity.
\end{itemize}



\begin{table}[!htbp]
	\caption{Effect of phase 2 treatments on private choices in outcome round (pooled)}
	\label{tab_main_phase_2_pooled}
	\centering
	\resizebox{\textwidth}{!}{
		\input{../../outputs/tables/main_phase_2_pooled.tex}
	}
	\begin{tablenotes}
%		\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
%		\item Controls include items and quality stuff
%		\item 
		
		\item	\scriptsize \textit{Notes}: In this specification, I pool the \textit{No discussion (private)} and the \textit{No discussion, public (non-observers)}. They are the omitted category. I also pool \textit{2-person discussion (speakers)} and \textit{3-person discussion} participants, calling them \textit{Discussion (pooled)}. * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample includes all treatment arms in phase 2 of data collection. Column (3) only includes choices that involved a transgender worker. 
In columns (1) and (2), the outcome is whether the \textit{alternative worker} (rather than the male \textit{benchmark worker}) was selected in the private choices in the \textit{outcome round}. In column (3), it is whether the transgender worker was selected. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female. 
%The mean of the dependent variable when the worker is trans and in the \textit{No discussion (private)} arm indicates that the transgender worker was selected (rather than the male benchmark worker) 40\% of the time. The mean when the worker is male or female in the \textit{No discussion (private)} arm is above 50\% because participants on average prefer female alternative workers to the male benchmark workers. 
The specification used is seen in equation \ref{eqn_main_spec}.  
Controls include stratum fixed effects; dummies for the rights videos;  whether the alternative worker was shown on the right; and the controls selected by double LASSO (see Section \ref{sec_lasso}). In column (2), controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown. Columns (2) and (3) also include controls for the relative \# items offered by the alternative worker, the relative reliability score of the worker, and a dummy for whether the reliability score was shown. Randomization inference \textit{p}-values at the base of the table test for differences between treatment effects across treatment arms, i.e., for differences in the interacted terms in columns (1) and (2), and differences in the uninteracted terms in column (3).
		%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
		%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
		%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
	\end{tablenotes}
\end{table}


\begin{landscape}
\begin{table}[!htbp]
\caption{Effect of observing others' choices}
\label{tab_effect_of_announce}
\centering
\resizebox{1.55\textwidth}{!}{
\input{../../outputs/tables/effect_of_announce.tex}
}
\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. $p$-values are in brackets. For coefficients involving randomized treatments, they are calculated using randomization inference.
	Unit of observation is the participant $\times$ choice level. The outcome in all columns is whether the participant chose a transgender worker in the private outcome round, restricting analysis to only choices involving a transgender worker. Sample only includes phase 2 of data collection. Column 1 only includes the \textit{No discussion (private)} arm. Column 2 only includes the \textit{Non-observers} from the \textit{No discussion (public)} arm, who knew they were choosing publicly in the treatment round but did not observe others' choices before making outcome round choices. Column 3 only includes the \textit{Observers} from the \textit{No discussion (public)} arm, who were told others' choices before making their outcome round choices. Columns 4-6 include combinations of each of these treatment conditions. \textit{P(others in group selected trans in treatment round)} ($\pi_{-i}$) is the proportion of times (out of a maximum of 4) that the other two participants in the group selected a transgender worker in the treatment round. \textit{P(selected trans in treatment round)} is the proportion of times (out of a maximum of 2) that the participant herself selected a transgender worker in the treatment round.  Controls include stratum fixed effects; dummies for the rights videos; whether the alternative worker was shown on the right; relative \# items offered by the transgender worker; relative reliability score; and a dummy for whether the reliability score was shown.  
%			Evidence of asymmetric persuasion? RI p-value on the mean predicted effect of \textit{No discussion, public, observer} when evaluated at $ P(\text{others in group selected trans in treatment round}) = 0.5 $ is p=\input{../../outputs/stats/effect_of_announce_p05}.
%			\item Only choice-pairs from the outcome round that include a transgender photo are included. The dependent variable in all columns is whether the transgender person was selected. All SDB scores are based on the \citet{crowne1960marlowe} index. In column (1), the SDB score is corrected for acquiescence bias. In column (2), I run a factor analysis on the SDB score, and use the resulting index. In column (3), I use an index calculated using inverse-covariance weights, as seen in  \citet{andersonMultipleInferenceGender2008}. More detail on the construction of the SDB scores is found in Appendix \ref{sec_data_sdb}.
		\end{tablenotes}
\end{table}
\end{landscape}

\begin{landscape}
\begin{table}[!htbp]
\caption{Effect of listening to a discussion that selected transgender workers}
\label{tab_effect_of_listening}
\centering
\resizebox{1.55\textwidth}{!}{
\input{../../outputs/tables/effect_of_listening.tex}
}
\begin{tablenotes}
			\scriptsize
			\item  \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. $p$-values are in brackets. For coefficients involving randomized treatments, they are calculated using randomization inference.
	Unit of observation is the participant $\times$ choice level. The outcome in all columns is whether the participant chose a transgender worker in the private outcome round, restricting analysis to only choices involving a transgender worker. Sample only includes phase 2 of data collection. Columns 1-2 only include the \textit{No discussion (private)} arm. Columns 3-4 only includes the \textit{No discussion (private)} arm and the \textit{Listeners} who watched and listened to the 2-person discussion. Columns 5-6 include only the Listeners and the \textit{Non-observers}, who knew their choices in the treatment round would be public, but who weren't told the choices of others before making their outcome round choices. Columns 7-8 include the No-discussion (private) arm, the Non-observers, and the Listeners. \textit{P(others in group selected trans in treatment round)} ($\pi_{-i}$) is the proportion of times that the other two participants in the group selected a transgender worker in the treatment round. In the case of listeners, this is out of a maximum of 2 (since the others in their group, the speakers, are make two joint choices for the choices involving transgender workers). In the case of the no-discussion (private) and non-observers, it is out of a maximum of 4, since other participants can make different choices. \textit{Other controls} include stratum fixed effects; dummies for the rights videos; whether the alternative worker was shown on the right; relative \# items offered by the transgender worker; relative reliability score; and a dummy for whether the reliability score was shown.  \textit{LASSO controls} are those selected by double LASSO (see Section \ref{sec_lasso}).
%			Evidence of asymmetric persuasion? RI p-value on the mean predicted effect of \textit{No discussion, public, observer} when evaluated at $ P(\text{others in group selected trans in treatment round}) = 0.5 $ is p=\input{../../outputs/stats/effect_of_announce_p05}.
%			\item Only choice-pairs from the outcome round that include a transgender photo are included. The dependent variable in all columns is whether the transgender person was selected. All SDB scores are based on the \citet{crowne1960marlowe} index. In column (1), the SDB score is corrected for acquiescence bias. In column (2), I run a factor analysis on the SDB score, and use the resulting index. In column (3), I use an index calculated using inverse-covariance weights, as seen in  \citet{andersonMultipleInferenceGender2008}. More detail on the construction of the SDB scores is found in Appendix \ref{sec_data_sdb}.
		\end{tablenotes}
\end{table}
\end{landscape}
%
%\begin{sidewaystable}[!htbp]
%\caption{Effect of listening to a discussion that selects transgender workers (probit)}
%\label{tab_effect_of_listening_probit}
%\resizebox{0.9\textwidth}{!}{
%\input{../../outputs/tables/listener_probit.tex}
%}
%\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: Coefficients reported are the probit coefficients (not the average marginal effects).   * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. $p$-values are in brackets.
%	Unit of observation is the participant $\times$ choice level. The outcome in all columns is whether the participant chose a transgender worker in the private outcome round, restricting analysis to only choices involving a transgender worker. Sample only includes phase 2 of data collection. Columns 1-2 only include the \textit{No discussion (private)} arm. Columns 3-4 only includes the \textit{No discussion (private)} arm and the \textit{Listeners} who watched and listened to the 2-person discussion. Columns 5-6 include only the Listeners and the \textit{Non-observers}, who knew their choices in the treatment round would be public, but who weren't told the choices of others before making their outcome round choices. Columns 7-8 include the No-discussion (private) arm, the Non-observers, and the Listeners. \textit{P(others in group selected trans in treatment round)} ($\pi_{-i}$) is the proportion of times that the other two participants in the group selected a transgender worker in the treatment round. In the case of listeners, this is out of a maximum of 2 (since the others in their group, the speakers, are make two joint choices for the choices involving transgender workers). In the case of the no-discussion (private) and non-observers, it is out of a maximum of 4, since other participants can make different choices.
%			
%%			Evidence of asymmetric persuasion? RI p-value on the mean predicted effect of \textit{No discussion, public, observer} when evaluated at $ P(\text{others in group selected trans in treatment round}) = 0.5 $ is p=\input{../../outputs/stats/effect_of_announce_p05}.
%%			\item Only choice-pairs from the outcome round that include a transgender photo are included. The dependent variable in all columns is whether the transgender person was selected. All SDB scores are based on the \citet{crowne1960marlowe} index. In column (1), the SDB score is corrected for acquiescence bias. In column (2), I run a factor analysis on the SDB score, and use the resulting index. In column (3), I use an index calculated using inverse-covariance weights, as seen in  \citet{andersonMultipleInferenceGender2008}. More detail on the construction of the SDB scores is found in Appendix \ref{sec_data_sdb}.
%		\end{tablenotes}
%\end{sidewaystable}


%
%\begin{figure}[!htbp]
%		
%		\centering
%		\caption{Predictions about others in group, for pairs not involving a transgender worker}
%			\includegraphics[width=0.7\linewidth]{../../outputs/figs/group_predic_non_trans.pdf}
%		\label{fig_group_predic_non_trans}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: 	Sample includes all participants in the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phases.  Unit of observation is participant $\times$ prediction. Only choices that do \textit{not} include a transgender photo are included. Hollow bars represent the probability that a participant predicts that their group-member selects a transgender delivery worker. The prediction was incentivized. Each participant made 2 predictions (one involving a transgender worker) for each of their 2 group members. The two predictions involving a transgender worker are included for analysis. Filled bars represent the actual probability that participants select a transgender worker in the outcome round (restricting to only choices for which another group member made a prediction).		\item 
%			%			\item \textit{Panel A}: x-axis is the probability that a given person in the group was marked as speaking positively about transgender workers for a given choice that included a transgender worker.
%%			Difference between No discussion (private) and 3-person discussion with no positive discussion at all is \input{../../outputs/stats/diff_control_anti_discuss} %%(p=\input{../../outputs/stats/diff_control_anti_discuss_p}).
%%\item \textit{Panel B}: 
%			%The outcome is whether the participants selected the comparator in the treatment round of their hiring choices.
%		\end{tablenotes}
%	\end{figure}

	

%\subsection{Phase 2 mechanism results}

	
		
			





%	
%\begin{table}[!htbp]
%	\caption{Phase 2 - round 1 choices}
%	\label{tab_r1_phase_2}
%	\resizebox{\textwidth}{!}{
%		\input{../../outputs/tables/r1_phase_2.tex}
%	}
%	\begin{tablenotes}
%		\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
%		%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
%		%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
%		%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
%	\end{tablenotes}
%\end{table}
%	
	
	
	
	
%	\begin{table}[htbp]
%	\centering
%	\caption{Treatment round choices}
%	\centering
%\footnotesize \textbf{Panel A:} 3-person discussion sample (Phases 1 and 2)	
%\vspace{-1em}
%
%	\normalsize
%		\begin{subtable}[t]{1\textwidth}
%\label{tab_attitudes}
%\centering
%\resizebox{0.8\textwidth}{!}{
%\input{../../outputs/tables/r1_main.tex}
%}
%		\end{subtable}
%	\vspace{1em}
%	
%		\centering
%\footnotesize \textbf{Panel B:} Phase 2 Sample (Listeners excluded)
%\vspace{-1em}
%
%	\normalsize
%		\begin{subtable}[t]{0.8\textwidth}
%	\resizebox{\textwidth}{!}{
%		\input{../../outputs/tables/r1_phase_2.tex}
%	}
%		\end{subtable}
%	\footnotesize
%%\flushright \textit{Continued on next page...}
%\end{table}
%	
	
	


%\subsection{Videos}




%	
%	\begin{table}[!htbp]
%\caption{Effect of videos - all phases, all discussion arms}
%\label{tab_videos}
%\resizebox{\textwidth}{!}{
%\input{../../outputs/tables/videos.tex}
%}
%\begin{tablenotes}
%\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
%%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
%%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
%%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
%\end{tablenotes}
%\end{table}





%\begin{table}[!htbp]
%\caption{Video interactions on norms}
%\label{tab_video_interactions_norms}
%\resizebox{\textwidth}{!}{
%\input{../../outputs/tables/video_interactions_norms.tex}
%}
%\begin{tablenotes}
%\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
%%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
%%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
%%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
%\end{tablenotes}
%\end{table}










%\subsection{Balance}







%\subsection{Heterogeneity}









	
	
		
	
	
	
%	
%	\begin{figure}[!htbp]
%		
%		\centering
%		\caption{Treatment round choices - phase 2}
%			\includegraphics[width=\linewidth]{../../outputs/figs/r1_bar_phase2.pdf}
%		\label{fig_r1_phase2}
%		\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: %Hollow bars represent the likelihood of a participants predicting that their neighbor selects a transgender person for a delivery. Filled bars represent the actual probability of choosing a transgender person in the outcome round, restricting to only the choice pairs for which a prediction was made. Only choice-pairs that \textit{do not} include a transgender photo are included.
%		\end{tablenotes}
%	\end{figure}


%\input{../../outputs/stats/baseline_discrim.tex}









%






\clearpage



\section{Data and measurement}
\label{sec_data}
\setcounter{table}{0}
\setcounter{figure}{0}
%\linespread{5}

\subsection{Predicted choices (community)}


Participants first made incentivized predictions about the choices of others in the study whom they did not know. They were shown 3 pairs of delivery options and were truthfully informed that 20 other people in the study, from the participants' area, had been shown those same pairs. They had to predict how many of those 20 picked each option. If participants made the closest guess, on average, across all 3 pairs, they were entered into a lottery to win 3000 Rs.' worth of additional items. Two of the 3 pairs were male-to-male comparisons, and 1 pair compared a male and a transgender person.
%
%
%
%Participants were told that if they made the best guess out of everyone in the study, they would be entered into a lottery with a prize worth Rs. 3000. This description of the elicitation was used in order to make the instructions as simple as possible for the participants. 
%
A randomly selected half of the participants were always asked how many picked the option on the left, and half were asked how many picked the option on the right. The phrasing used was: "\textit{In your opinion, how many people out of 20 chose the person on the [right/left]?}" The transgender option always appeared on the side being asked about.



\subsection{Predicted choices (own group)}

Participants made incentivized predictions of the private hiring choices of the other two people in their group. 
%
For each of the other two group members, they were asked to predict which option they chose for two pairs of delivery options. For each other person, one choice-pair compared a male and a male, and another compared a male and a transgender person. If they correctly guessed all 4 combinations (2 predictions each of the 2 group members) they were entered into a second lottery to win a separate prize, also worth 3000 Rs. When participants were making their main hiring choices, they did not know that their neighbors would later be paid for predicting their answers. This rules out concerns that they tried to make their hiring choices more predictable in order to help out their neighbors.

\subsection{Social desirability score}
\label{sec_data_sdb}

To measure the social desirability score of each participant in phase 1, I use an adapted version of the \citet{crowne1960marlowe} index that includes the following questions:
\begin{enumerate}
\itemsep-1em
  \item I sometimes feel annoyed at people when I don’t get what I want.
\item No matter who I'm talking to, I'm always a good listener (\textit{reverse coded}).
\item I sometimes try to take revenge instead of forgiving and forgetting.
\item I am always polite, even to people who are not nice. (\textit{reverse coded})
\item There have been times when I was jealous of other people’s luck.
\item I am sometimes annoyed when people ask me for favors.
\item I have deliberately said something that hurt someone’s feelings.
\end{enumerate}
This subset of questions was selected based on an exploratory factor analysis of pilot data. I calculate an individual's social desirability score by summing the number of socially desirable answers they give (that is, disagreeing with questions 1, 3, 5, 6, and 7, or agreeing with questions 2 and 4). This social desirability score is used in \autoref{tab_confound_interact}.
%In \autoref{tab_sdb_robustness}, I use three alternative specifications for the calculation of the social desirability score. 
%First, I correct the score for acquiescence bias, or the tendency to agree with whatever question is being asked. This correction is common in the psychometric literature and has been shown to substantially improve the reliability of psychometric constructs, including in developing country contexts  \citep{sotoDevelopmentalPsychometricsBig2008, rammstedtImpactAcquiescenceEvaluation2013, laajajMeasuringSkillsDeveloping2019}. 
%To make this correction, I take the following steps:
%\begin{enumerate}
%  \item Reverse the reverse-coded items.
%  \item Take the average of all positively-coded items for each individual $i$.
%  \item Subtract this from the average of the reverse-coded items for the same individual $i$.
%  \item Divide this by two to get the acquiescence score $AS_i$ for individual $i$.
%  \item Correct individual $i$'s raw scores by adding $AS_i$ to every reverse-coded item, and subtracting $AS_i$ from every positively-coded item.
%\end{enumerate}
%
%Second, I calculate a social desirability score based on weights from a factor analysis that assumes a single factor. The loadings for each of the 7 variables are: \input{../../outputs/stats/sdb_loadings}. Following the psychometric literature (e.g., \citealp{rammstedtImpactAcquiescenceEvaluation2013}), I remove measures with a loading less than 0.3, and weight the remaining measures with the factor loading.



\subsection{Salience}
\label{sec_data_salience}

I examine how salient the idea of transgender people is for each participant. I use a test of salience based on the one seen in \citet{johnCanSimplePsychological2022}. Participants were read two lists containing a mix of words mostly related to deliveries, everyday objects, and identity. The first list contained the words: \textit{Delivery, Dal, Tamil, Bucket, Sambar, Man, Water, App}, and \textit{Insurance}. The second list contained the words \textit{Idly, Pot, Bike, Hindu, Hospital, Transgender, Butter, President}, and \textit{Peas}. The lists were read out in the same order to every participant. After each list was read out once by the enumerator, participants were asked to repeat as many words as they could from the list. The enumerators were instructed to not repeat the options. To incentivize performance in the game, participants were truthfully told that if they recalled the most words of all the people in the study, they would be entered into a lottery with a prize worth Rs. 3000.

The measure of salience was whether they recalled the word "transgender", conditional on the total number of other words they recalled. In the \textit{No discussion (private)} arm, people remembered the word ``transgender'' \input{../../outputs/stats/salience_trans} of the time, and on average remembered other words \input{../../outputs/stats/salience_non_trans} of the time.
There is a significant correlation ($p$=0.04) between participants' recollection of the term ``transgender" and their selection of a transgender individual in the outcome hiring round. This suggests that the salience measure is successfully capturing a signal that is relevant to hiring decisions.

\subsection{List experiment}
\label{sec_list_exp}

To measure negative attitudes towards transgender people, I use a double list experiment \citep{droitcour2004item, glynnWhatCanWe2013}. In this method, participants are shown two lists of statements (list A and list B), and are asked how many statements from each list they agree with. They are not asked \textit{which} statements they agree with, so neither the surveyor nor the researcher can determine whether they agreed with a particularly sensitive statement in the list. List A and B each contain 5 non-sensitive statements. For each participant, either list A or list B is randomly selected to include one additional statement: "In general, if I see a transgender person, I walk away." Enumerators read out each list and asked the participant how many statements they agreed with. Whether list A or list B was read first was also randomized. Using two lists has the advantage of enabling a validation check of the treatment effect estimates \citep{chuangSexLiesMeasurement2021}. Instead of pooling the treatment effect estimates across both lists, as in the main specification, I can estimate the treatment effect of the 3-person discussion separately for list A and list B. When using each list separately, the treatment effect estimates are similar (\input{../../outputs/stats/list_exp_coeff_list_a} and \input{../../outputs/stats/list_exp_coeff_list_b} respectively), and the difference between the estimates is not significant (p=\input{../../outputs/stats/list_exp_p_diff}).




%. The statement about transgender people was randomly added to either list A or list B. Whether list A or list B was read first was also randomized. For each list, enumerators asked how many statements in the list they agreed with. Column (1) of \autoref{tab_attitudes} shows that the discussion does not have a significant effect on the proportion of people who agree with the anti-trans statement.


%\resizebox{\textwidth}{!}{







\subsection{Group relations}
\label{sec_group_relations}


We asked participants questions about their relationships with others in their group, in order to understand how these affected group dynamics. In phase 1 of data collection, we asked each participant two questions about each of the other two people in their group: (i) What is your relationship with [\textit{NAME}]? (ii) How well do you know [\textit{NAME}]? (Options: \textit{Very well}, \textit{Quite well}, \textit{Not very well}, \textit{Very little}). I use question 1 to generate 4 dummy variables, indicating whether the other participant is (i) just a neighbor, (ii) a friend, (iii) a close family member, or (iv) another family member. In phase 2, I expanded this set to include the following additional questions: (iii) 
  How long have you known [\textit{NAME}]? (Options: \textit{Less than 6 months, 6 months to 1 year, 1-5 years, 5+ years}); (iv) In general, how often do you talk to [\textit{NAME}]? (Options: \textit{Never, A few times per year, A few times per month, A few times per week, Most days, Every day});  (v) How often do you ask [\textit{NAME}] for advice? (Same options as iv); (vi) How often do you ask [\textit{NAME}] for recommendations of items to buy? (Same options as iv); (vii) How frequently do you tell secrets to  [\textit{NAME}]? (Same options as iv). I create an index of the perceived strength of the relationship with another group participant. I use a factor analysis to generate loadings for the set of variables that includes the four dummy variables created by question 1, and the questions 2-7. 
%The estimated loadings are in \autoref{tab_group_relations_loadings}.
 I retain all measures that have a loading with an absolute value greater than 0.3. I create an index using a weighted sum of all measures where the weights are proportional to the estimated loadings. In cases where some data is missing (for example, phase 1 participants for whom we do not elicit questions 3-7), only the data that is present is used to calculate the weighted sum.




\subsection{Private grocery pick up choices}
\label{sec_private_delivery}

Participants were told that they had been entered into a lucky draw to win a Rs. 5000 gift voucher, which could be used to buy grocery items. The winner would have to organize getting the items by calling the worker they selected, telling the worker which items they wanted, and meeting the worker at our office to pick up the items.\footnote{In order to ensure that participants anticipated some extended face-to-face contact with the worker, they were also told that they had to have a 15-minute conversation with the worker to give feedback on the process.} In this round, participants saw 4 pairs of options for who they could pick up the items from, and were told that if they won the lottery, we would randomly select one of their choices to organize the pickup with. 2 of the 4 pairs included a transgender worker.

The enumerator giving the interview did not know what responses were given. We did not ask the respondent for their choice verbally, as in the main hiring rounds. Instead, we gave the tablet directly to the respondent, and they clicked their preferred answer. Upon clicking, the tablet would automatically skip to the next question and not reveal again the answer chosen before, making it impossible for the enumerator to know what was selected. We truthfully told respondents that enumerators wouldn't know what was selected, making the answers anonymous.\footnote{Although participants still presumably realized that their data could be used for research purposes, this elicitation nevertheless plausibly reduces the impact of social image concerns on their behavior because the salient social judge, the enumerator, would not know how they had answered.} The anonymity of their answers was well understood by the participants: only \input{../../outputs/stats/prop_an_check1.tex} said that their neighbors would know which options they picked, and only \input{../../outputs/stats/prop_an_check2.tex} said that the surveyor would know.



\subsection{Persuasiveness}
\label{sec_persuasiveness}
In phase 2 of data collection, we elicited a set of questions designed to measure how persuasive an individual was likely to be in a group discussion. For each question, the participant was asked to rate out of 10 how they scored on a measure of a personality trait. 5 of the traits measured are associated with extraversion and leadership, while 2 were associated with introversion. The questions were: (i) Out of 10, how confident is [NAME]?; (ii) Out of 10, how quiet is [NAME]? (reverse coded); (iii) Out of 10, how like a leader is [NAME]?; (iv) Out of 10, how shy is [NAME]? (reverse coded); (v) Out of 10, how talkative is [NAME]?; (vi) Out of 10, how admirable is [NAME]?; (vii) Out of 10, how inspiring is [NAME]?. These questions were selected from a broader set of questions by selecting the subset of questions that loaded onto the first factor in an exploratory factor analysis of pilot data. I combine the questions into a persuasiveness index by correcting for acquiescence bias, using a factor analysis with one factor to generate factor loadings for each of the 7 measures, and retain all measures to create an index using a weighted sum of all measures, where the weights are proportional to the estimated loadings.
%
%\begin{enumerate}
%  \item  I correct for acquiescence bias in the same way as described in Appendix \ref{sec_data_sdb}. 
%  \item I use a factor analysis with one factor to generate loadings for each of the 7 measures. The estimated loadings are \input{../../outputs/stats/ls_loadings.tex}. Since all loadings are above 0.3, I retain all the measures and create an index using a weighted sum of all measures, where the weights are proportional to the estimated loadings.
%\end{enumerate}
%
Each participant is rated by both their neighbors. The correlation between the two ratings for each person is positive and significant (Pearson's correlation of \input{../../outputs/stats/ls_corr_basic}, \input{../../outputs/stats/ls_corr_basic_p}), even when controlling for rater fixed effects (Pearson's correlation of \input{../../outputs/stats/ls_corr_fes}, \input{../../outputs/stats/ls_corr_fes_p}). This suggests that the rating detects a meaningful characteristic of the participant.



% (i)(ii) use 


%\subsection{Private hiring choices}
%
%Even when participants made hiring choices in private (without their neighbors listening), their choices in the main round may have been affected by social image concerns. Knowing that their neighbors might see who delivers groceries to their home, they might choose a transgender worker to signal that they are non-discriminatory to their neighbors.
%
%
%To evaluate whether the treatment effects were driven by social image concerns, we therefore added an outcome measure in phase 2 that was designed to be more robustly private than the main outcome.
%
%
%
%This "private hiring choice" was more private in two ways.
%
%First, in order that neighbors would not be able to see which worker was chosen during a delivery, participants were told that they would have to pick up grocery items themselves from our office. 
%
%Specifically, they were told that they had been entered into a lucky draw to win a Rs. 5000 gift voucher which could be used to buy grocery items. The winner would have to organize getting the items by calling the worker they selected, telling the worker which items they wanted, and meeting the worker at our office to pick up the items. In order to ensure that participants anticipated some extended face-to-face contact with the worker, they were also told that they had to have a 15-minute conversation with the worker to give feedback on the process.
%
%In this round, participants saw 4 pairs of options for who they could pick up the items from, and were told that if they won the lottery we would randomly select one of their choices to organize the pickup with. 2 of the 4 pairs included a transgender worker.
%
%Second, we also adapted the elicitation process so that the enumerator giving the interview did not know what responses were given. We did not ask the respondent for their choice verbally, as in the main hiring rounds. Instead, we gave the tablet directly to the respondent, and they clicked their preferred answer. Upon clicking, the tablet would automatically skip to the next question and not reveal again the answer chosen before, making it impossible for the enumerator to know what was selected. We truthfully told respondents that enumerators wouldn't know what was selected, making the answers anonymous.







%\textbf{CALCULATION}






%\begin{table}[p]
%\caption{Salience of the word "transgender" is predictive of the outcome round hiring choices}
%\label{tab_salience_checks}
%\input{../../outputs/tables/salience_checks.tex}
%\begin{tablenotes}
%			\footnotesize
%			\item  \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
%			\item Only choice-pairs from the outcome round that include a transgender photo are included. The dependent variable in all columns is whether the transgender person was selected. All SDB scores are based on the \citet{crowne1960marlowe} index. In column (1), the SDB score is corrected for acquiescence bias. In column (2), I run a factor analysis on the SDB score, and use the resulting index. In column (3), I use an index calculated using inverse-covariance weights, as seen in  \citet{andersonMultipleInferenceGender2008}. More detail on the construction of the SDB scores is found in Appendix \ref{sec_data_sdb}.
%		\end{tablenotes}
%\end{table}



%\textit{List experiment}. In order to examine whether individuals' private attitudes towards transgender people outside the sphere of hiring were changed by the group discussion, we used a double list experiment. It measured the proportion of people who agreed with the statement ``In general, if I see a transgender person, I walk away.''. Since the list experiment does not allow the enumerator or the researcher to infer \textit{which} statements the participant agrees with, it preserves the anonymity of their responses and so is less likely to be vulnerable to social desirability concerns than standard self-reported attitude questions. Enumerators read out two lists (A and B) that contained 5 non-sensitive statements about a participant's preferences. The statement about transgender people was randomly added to either list A or list B. Whether list A or list B was read first was also randomized. For each list, enumerators asked how many statements in the list they agreed with. Column (1) of \autoref{tab_attitudes} shows that the discussion does not have a significant effect on the proportion of people who agree with the anti-trans statement.



\subsection{LASSO controls}
\label{sec_lasso}



Following \citet{belloniInferenceTreatmentEffects2014}, I use double LASSO to select controls in the main results. The full set of possible controls that were selected from was:

\textit{ (1) Female (=1); (2) Speaks English (=1); (3) Reads English (=1); (4) Hindu (=1); (5) Bachelor’s degree (=1); (6) Married (=1); (7) Employed (=1); (8) Landlord (=1); (9) Num. children; (10) Employer (=1); (11) Household size; (12) Monthly household food expenditure per capita (Rs.); (13) Num. family members in group-of-3; (14) Num. neighbours in group-of-3; (15) Num. friends in group-of-3; (16) Taken part in market research survey (=1); (17) Has received free item as promotion (=1); (18) Someone in household ordered taxi with app (=1); (19) Someone in household ordered food with app (=1); (20) Someone in household ordered other items with app (=1); (21) Self-reported WTP for delivery; (22) Respondent would normally be household member who receives delivery (=1); (23) Relative number of items offered by worker; (24) Relative reliability score; (25) Reliability score is shown (=1); (26) Reliability score of the benchmark worker.}

In addition, in interaction specifications where the main treatment was identified by the interaction \textit{Worker is trans} $\times$ \textit{Treatment}, I also include the controls interacted with \textit{Worker is trans} as possible controls. I also calculate the mean of each control variable for the two other people in a participant's group-of-3, and include that mean as a possible control. When there are multiple treatment arms in one specification (e.g., for the phase 2 discussion-arm treatment arms), I include the union of the controls selected by a double LASSO using each of the treatment dummies. I indicate which controls were selected for Tables 1 and 2 by the LASSO selection process in \autoref{tab_lasso}.


%\begin{table}[!htbp]
%\caption{All potential controls used in LASSO control selection process}
%\label{tab_controls}
%\resizebox{0.8\textwidth}{!}{
%\input{../../outputs/tables/possible_controls.tex}
%}
%%\begin{tablenotes}
%%\item Group-level control is the mean value of the variable for the other two people in a participant's group. (2) and (3) indicate the column numbers from Table 1 and Table 2 in the main text. %\footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
%%\end{tablenotes}
%\end{table}

\begin{table}[!htbp]
\caption{LASSO controls used in Table 1 and Table 2}
\label{tab_lasso}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/lasso_controls.tex}
}
\begin{tablenotes}
\item Group-level control is the mean value of the variable for the other two people in a participant's group. (2) and (3) indicate the column numbers from Table 1 and Table 2 in the main text. %\footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
\end{tablenotes}
\end{table}


\clearpage







\section{Transcript data}
\label{sec_appendix_transcript}

\subsection{Sentence-level analysis ($k$-means clustering)}
\label{sec_appendix_transcript_sentence}

I start with a hand-transcribed dataset where the observation is at the (discussion-choice $\times$ participant $\times$ sentence) level. This includes the full sentences spoken by the participant in Tamil and translated into English by research assistants. I use OpenAI's \textit{embedding-3-small} model to extract a 1,536-dimensional semantic embedding vector for each English sentence. I then apply k-means clustering to the original high-dimensional embeddings to classify each sentence into one of 20 clusters. I set the number of clusters to 20, chosen to provide a granular view of different discussion topics while maintaining interpretable cluster sizes. For each cluster, I identify representative utterances by computing the cosine similarity between each utterance's embedding and its assigned cluster's centroid. Utterances with the highest similarity to their cluster centroid were selected as representative examples, shown in \autoref{transcript_cluster_quotes}, to provide interpretable characterizations of each cluster's semantic content.

\subsection{Broader transcript features and AI hypothesis generation}
\label{sec_appendix_transcript_hypothesis_generation}

To analyze the content of group discussions at the transcript level (where one transcript corresponds to the discussion about a given choice-pair), I used an iterative process using OpenAI's \textit{GPT-4o-mini} large language model to generate and evaluate hypotheses about discussion patterns. The methodology consisted of three main stages: hypothesis generation, transcript rating, and dimensional reduction through factor analysis.


\textbf{Hypothesis generation}. I generated 500 hypotheses by randomly pairing two group discussion transcripts (for discussions involving a transgender worker) and asking \textit{GPT-4o-mini} to identify the main difference between them. For each pair of randomly selected discussions, the following prompt was used:

\begin{lstlisting}[basicstyle=\ttfamily]
BACKGROUND:
Below are transcripts from 2 group discussions in Chennai, India, in which 3 participants discussed whom they would prefer to hire to deliver groceries to their home. Participants had to choose between option A or B. They were shown photos of the two delivery workers, one of whom was male, and one of whom was transgender. Each discussion dealt with multiple rounds, changing the choice of Option A and Option B each time. The grocery items on offer were: Aachi masala spice, tea powder, and ghee. Option A and B may have offered the same set of items, or different sets of items.
They were also in some cases given information about:
 - the languages spoken by the delivery workers (only Tamil, or Tamil and English)
 - the delivery workers' experience
 - how many deliveries they completed in a training task
Participants were asked to discuss which option they preferred, and why, and then make a collective choice between the two options.

TRANSCRIPTS:
   The transcripts are in JSON format.
   Each discussion is represented as a list of utterances, with each utterance containing the following fields:
   - transcript_line_id: the order of the utterance in the discussion
   - who_speaking_label: the label of the participant speaking (1, 2, or 3, or unknown)
   - speech_english: the English translation of the participant's speech
   - pro_a_b: a column manually coded by research assistants indicates whether the participant is arguing to choose A or B, or neither
   
   TRANSCRIPT 1:
      {TRANSCRIPT 1}

    TRANSCRIPT 2:
      {TRANSCRIPT 2}

TASK:
Your task is to identify what has changed from TRANSCRIPT 1 to TRANSCRIPT 2. Focus on the generalizable insight that can be applied in other contexts. Ignore things that are specific to these transcripts. Do not make references to these transcripts that may not be relevant for others. Come up with an insight that captures the sort of change observed moving from TRANSCRIPT 1 to TRANSCRIPT 2.

Come up with an insight as a single sentence in this exact format:
Hypothesis: _____ is the main difference between TRANSCRIPT 2 compared to TRANSCRIPT 1.

Please make sure that the hypothesis is:
i. clear (i.e., precise, not wordy, and easy to understand);
ii. generalizable to novel situations (i.e., they would make sense if applied to other transcripts);
iii. empirically plausible (i.e., this is a dimension on which messages can vary);
iv. unidimensional (i.e., avoid hypotheses that list multiple constructs, so if there are many things changing, pick one);
v. usable (i.e., a human equipped with this insight could evaluate another group discussion in a similar way)
\end{lstlisting}

The temperature parameter for GPT-4 was randomized between 0.1 and 0.9 to ensure robustness to model parameters. Each generated hypothesis was then automatically cleaned to remove specific references to "Group 1" and "Group 2" in order to make them suitable to act as a rating. For example, ``Hypothesis: The level of consensus among participants in decision-making is the main difference between Group 2 compared to Group 1.'' was converted to ``The level of consensus among participants in decision-making.''

\textbf{Transcript rating}. After generating the hypotheses, we used \textit{GPT-4o-mini} to rate how well each of the 1034 discussion transcripts aligned with each of the 500 hypotheses on a scale of 1--10. Each transcript was rated exactly once against each hypothesis, using the following prompt:

\begin{lstlisting}[basicstyle=\ttfamily]
BACKGROUND:
Below is a transcript from a group discussion in Chennai, India, in which 3 participants discussed who they would prefer to hire to deliver groceries to their home. Participants had to choose between option A or B.
They were shown photos of the two delivery workers, one of whom was male, and one of whom was either male, female, or transgender.
Each discussion dealt with multiple rounds, changing the choice of Option A and Option B each time.
The grocery items on offer were: Aachi masala spice, tea powder, and ghee. Option A and B may have offered the same set of items, or different sets of items.
They were also in some cases given information about:
 - the languages spoken by the delivery workers (Tamil only, Tamil and English)
 - the delivery workers' experience
 - how many deliveries they completed in a training task
Participants were asked to discuss which option they prefer, and why, and then make a collective choice between the two options.

TRANSCRIPT:
{TRANSCRIPT}

HYPOTHESIS:
{HYPOTHESIS}

TASK:
Rate how much this single transcript aligns with the hypothesis on a scale from 1 to 10, where:
  - 10 = maximally in line with the hypothesis
  - 1  = not at all in line with the hypothesis

Output in JSON with two keys:
  - explanation (string)
  - rating (integer, 1-10 or return NA for the rating if the transcript does not contain enough information to make a rating.)
\end{lstlisting}

As with hypothesis generation, the temperature parameter was randomized between 0.1 and 0.9 for each rating to ensure robustness to model parameters.

\textbf{Dimension reduction}. Since many of the 500 hypotheses generated were similar (e.g., ``The level of consensus among participants in decision-making.'' and ``The level of consensus among participants regarding their preference.''), I group the hypotheses into highly correlated factors, based on the correlation of transcript ratings across hypotheses. I first used parallel analysis that suggested 11 factors in the data, corresponding to 11 hypothesis ``types'', and then use factor analysis to create indexes for these 11 hypothesis types.  For each factor, I use the single hypothesis with the highest absolute loading as the label for that hypothesis type.


\subsection{Morality ratings}
\label{sec_appendix_transcript_morality}

\textbf{Morality rating.} I used OpenAI's \textit{GPT-4o-mini} model to rate how much each discussion transcript invoked morality on a 0--1 scale. I repeated the rating 50 times with randomly-varying prompts to ensure robustness, and took the mean rating over all 50 ratings. This methodology, particularly the randomization of prompt elements and multiple analyses per transcript, was designed to minimize the impact of any particular prompt formulation or model parameter setting on the final results. %Each transcript was analyzed 50 times with different configurations to prevent any single prompt formulation from driving the results.

\textbf{Prompt generation.} The prompt was randomized along three dimensions:
\begin{enumerate}
\item Moral topics: Three topics were randomly selected from a pool of eleven possibilities: (i) \textit{fairness} (ii) \textit{rights} (iii) \textit{justice} (iv) \textit{giving people opportunities} (v) \textit{ethical decision-making} (vi) \textit{what is 'right' or 'wrong'} (vii) \textit{responsibility and accountability} (viii) \textit{personal autonomy} (ix) \textit{care and compassion} (x) \textit{human dignity} (xi) \textit{truth and honesty}
\item Evaluation criteria: The inclusion of specific evaluation criteria was randomized, with two possible additions: (i) whether to explicitly consider both frequency and intensity of moral considerations; and (ii) whether to weight explicit moral references more heavily than implicit ones.
\item Temperature: The GPT-4o-mini temperature parameter was randomly set between 0.1 and 0.9 for each analysis to vary how ``surprising'' the model's outputs were.
\end{enumerate}

\textbf{Prompt template}. The following base prompt was used, with randomized elements inserted as described above:
\begin{lstlisting}[basicstyle=\ttfamily]
BACKGROUND:
Below is a transcript from a group discussion in Chennai, India, in which 3 participants (R1, R2, R3) discussed who they would prefer to hire to deliver groceries to their home. Participants had to choose between option A or B. They were shown photos of the two delivery workers, one of whom was male, and one of whom was either male, female, or transgender. Each discussion dealt with multiple rounds, changing the choice of Option A and Option B each time. The grocery items on offer were: Aachi masala spice, tea powder, and ghee. Option A and B may have offered the same set of items, or different sets of items.

They were also in some cases given information about:
- the languages spoken by the delivery workers (Tamil only, Tamil and English)
- the delivery workers' experience
- how many deliveries they completed in a training task

Participants were asked to discuss which option they prefer, and why, and then make a collective choice between the two options.

TASK: Analyze the transcript by rating how much the discussants invoke morality in their decision-making process on a scale from 0 to 1.

Rating Scale Details:
0: No moral considerations mentioned
1: Morality is the central theme and dominant focus

<randomly included for 50%>
The rating should reflect both:
- The frequency of mentions of moral considerations
- The intensity/emphasis of moral considerations when they appear
</randomly included for 50%>

<randomly included for 50%>
Explicit moral references (e.g., direct mentions of fairness, rights, justice, or 'right thing to do') should be weighted more heavily than implicit ones.
</randomly included for 50%>

Moral considerations include topics like:
- {RANDOM TOPIC 1}
- {RANDOM TOPIC 2}
- {RANDOM TOPIC 3}

Note: Simply mentioning demographic characteristics (like gender) without connecting them to moral concepts does not count as invoking morality.
\end{lstlisting}


%\textbf{Analysis Process}
%Each transcript was analyzed 50 times, with each iteration using a unique combination of randomly selected moral topics, evaluation criteria, and temperature settings. The model produced a rating between 0 and 1 for each analysis, along with an explanation of the rating. The final morality score for each transcript was calculated as the mean of these 50 ratings, with standard deviations computed to assess rating consistency.

	




\section{Heterogeneity in main results}
\label{sec_appendix_het}

%	\textcolor{red}{EXTRA ROBUSTNESS}:
	
%	\textcolor{red}{and to dropping the \input{../../outputs/stats/prop_others_could_hear_answers_rev.tex} of cases where the outcome round was overheard by neighbors (\autoref{tab_protocol_fidelity}). } 


\textbf{Trade-off between items and worker}. The pattern of choices indicates that participants traded off a preference for avoiding transgender workers with the value of the items on offer. Participants were sensitive to the items offered across each option in the pair: each additional item offered by one option in a pair made a participant \input{../../outputs/stats/item_sensitivity.tex} p.p. more likely to select that option (\autoref{tab_main}, column 2). And people were less sensitive to items when shown a transgender person (\autoref{tab_item_sensitivity}, column 1).\footnote{The sensitivity to items did not vary across treatment conditions (\autoref{tab_item_sensitivity}, columns 2--4), which alleviates concerns that the collective nature of the choice made in the group discussion led to changes in preferences for bundles of goods that could confound the treatment effect on discrimination.}

%\autoref{tab_main} (columns 2 and 3) also includes controls for the relative number of items offered by the worker, along with the relative reliability score and a dummy for whether the reliability score was shown.  

%{\color{red}To test for the presence of statistical discrimination against transgender workers, which could be driven by stereotypes of that transgender people are unreliable or untrustworthy, I embed a }

%\textcolor{red}{NOT SURE IF STATISTICAL STUFF SHOULD STAY HERE --- or put it below instead (attitudes/beliefs)}

\textbf{Statistical discrimination}. Belief-based (statistical) discrimination appears to underlie some of participants' unwillingness to select transgender people, driven by negative stereotypes that portray transgender workers as unreliable. Despite transgender workers having the same average reliability score as other genders in the experiment, participants rate transgender workers as less likely to complete a delivery (\autoref{tab_attitudes_beliefs_norms}, panel A, column 3; discussed below). To test whether this leads to discrimination, half of the choice-pairs included information about the reliability of both workers. Revealing the reliability score makes participants \input{../../outputs/stats/statistical_discrim.tex} p.p. more likely to select a transgender worker, and this effect is unique to transgender workers (\autoref{tab_statistical_discrim}, column 1). Anti-transgender discrimination in the control group therefore appears to be partially driven by statistical discrimination. I cannot adjudicate whether it is ``inaccurate'' or ``accurate'' statistical discrimination \citep{bohrenInaccurateStatisticalDiscrimination2023} because participants may be accounting for features that are not accounted for in the experimental reliability score, such as the risk that a transgender worker is harassed and prevented from carrying out a delivery.

However, the effect of the discussion does not appear to be driven by changes in such statistical discrimination. The discussion does not significantly affect beliefs about the reliability of transgender workers (\autoref{tab_attitudes_beliefs_norms}, panel A, column 3). And I find no evidence that the 3-person discussion reduces the belief-based component of discrimination, although I am not well-powered for this test (\autoref{tab_statistical_discrim}, column 2).\footnote{While the point estimate of the interaction of (\textit{Worker is trans} $\times$ \textit{Reliability score is shown} $\times$ \textit{3-person discussion}) is negative and large enough to negate the effect of (\textit{Worker is trans} $\times$ \textit{Reliability score}), I cannot reject that it is different from 0 ($p$=\input{../../outputs/stats/p_val_effect_on_statistical_discrim.tex}).}

\textbf{Heterogeneity by participant gender}. A heterogeneity analysis (\autoref{tab_het_demo}) shows that while anti-transgender discrimination is stronger for male participants than female participants (difference: \input{../../outputs/stats/diff_mf.tex} p.p., $p$=\input{../../outputs/stats/diff_mf_p.tex}), the \textit{treatment effects} of the discussion are similar for both males and females ($p$=\input{../../outputs/stats/diff_mf_disc_p.tex}). This is evidence against any explanations for the discussion's effects that are specific to a participant's gender. Relatedly, there is no significant treatment effect on preferences for cis-gender female delivery workers (estimate: \input{../../outputs/stats/eff_female.tex} p.p., $p$=\input{../../outputs/stats/eff_female_p.tex}). Nevertheless, the effect on male participants' preferences for female workers is substantial, even if insignificant (\input{../../outputs/stats/eff_female_male_only.tex} p.p., $p$=\input{../../outputs/stats/eff_female_male_only_p.tex}), leaving open the possibility that the discussion might be reducing discrimination against female as well as transgender workers. 

	
\begin{table}[!htbp]
\caption{Sensitivity to items does not vary across treatment arms and is lower for choices involving transgender workers}
\label{tab_item_sensitivity}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/item_sensitivity.tex}
}
\begin{tablenotes}
\item \footnotesize \textit{Notes}: \textit{Relative \# of items offered} is the number of items (1, 2 or 3) offered by the alternative worker, less the number of items offered by the male benchmark worker. \textit{Relative value of items offered} is the relative cost in rupees of the items offered by the alternative worker compared to the benchmark worker, divided by 100 (to ease interpretation).
\item * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses.
Standard p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample includes \textit{No discussion (private)} arm and \textit{3-person discussion arm} in both phase 1 and phase 2 of data collection. In all columns the outcome is whether the  \textit{alternative worker} (rather than the male \textit{benchmark worker}) was selected. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female. Columns (1)-(4) show the private choices in the \textit{outcome round}. Columns (5) and (6) show choices in the treatment round (for those in the discussion arm, this was the choices made \textit{during} the discussion.  
 The specification used is seen in equation \ref{eqn_main_spec}.  Controls include stratum fixed effects; dummies for the rights videos;  whether the alternative worker was shown on the right; the relative reliability score; a dummy for whether the reliability score was shown; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). Controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown. 
 %\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
\end{tablenotes}
\end{table}


\begin{table}[htbp]
	\centering

	\caption{Effect of discussion on norms, attitudes, and beliefs about reliability}
	\label{tab_attitudes_beliefs_norms}


	
\small \textbf{Panel A:} Norms 
		\centering
\vspace{-1em}

	\normalsize
		\begin{subtable}[t]{0.8\textwidth}
	\resizebox{\textwidth}{!}{
		\input{../../outputs/tables/norms.tex}
	}
		\end{subtable}
		
			\vspace{1em}
			
		\small \textbf{Panel B:} Attitudes and beliefs about reliability
	\centering
\vspace{-1em}

	\normalsize
		\begin{subtable}[t]{0.9\textwidth}
\label{tab_attitudes}
\centering
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/attitudes.tex}
}
		\end{subtable}
	\footnotesize
\begin{tablenotes}
\scriptsize	\item \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Randomization inference p-values are in brackets. Sample includes only the \textit{No discussion (private)} and \textit{3-person discussion} arms, in both phases. Controls include stratum fixed effects; dummies for the rights-video treatments; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). For Panel B, column (2), I include controls for the difference in items offered, the relative reliability score, and whether the reliability score is shown. As pre-specified, columns (1) and (2) are adjusted for multiple hypothesis testing using the q-value that controls for the false discovery rate \citep{andersonMultipleInferenceGender2008}.
\item \textit{Panel A, Column (1)}: Outcome is the incentivized predicted proportion of other people (out of 20) in the study will pick a transgender worker. Only the choice involving the transgender worker is included.
\item \textit{Panel A, Column (2)}: The unit of observation is the participant $\times$ prediction. Outcome is whether the participant predicted that another person in their group selected a transgender worker in the private outcome round.  Only predictions involving a transgender worker are included.

\item \textit{Panel B, Column (1)}: Outcome is the number of statements the participant agreed with on a list of statements. Each participant sees both List A and List B. The anti-trans statement (``In general, if I see a transgender person, I walk away'') is randomly included in either List A or List B. \textit{Question FEs} is a fixed effect for List B.
\item \textit{Panel B, Column (2)}: Enumerator describes two discriminatory scenarios. Outcome is whether the participant says the person's actions are wrong. \textit{Question FEs} is a fixed effect for the second scenario.
\item \textit{Panel B, Column (3)}: Outcome is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo. Participants rate two workers, one of whom is transgender. Order is randomized. \textit{Question FEs} controls for the order of the choice.
%In columns (1) and (2), the outcome is whether the \textit{alternative worker} (rather than the male \textit{benchmark worker}) in the private choices in the \textit{outcome round}. In column (3), it is whether the transgender worker was selected. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female. 
%The mean of the dependent variable when the worker is trans and in the \textit{No discussion (private)} arm indicates that the transgender worker was selected (rather than the male benchmark worker) \input{../../outputs/stats/p_choose_trans_control.tex} of the time. The mean when the worker is male or female in the \textit{No discussion (private)} arm is above 50\% because participants on average prefer female alternative workers to the male benchmark workers.
%The specification used is seen in equation \ref{eqn_main_spec}.  


%In column (2), controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown. Columns (2) and (3) also include controls for the relative \# items offered by the alternative worker, the relative reliability score of the worker, and a dummy for whether the reliability score was shown. randomization inference \textit{p}-values at the base of the table test for differences between treatment effects across treatment arms, i.e., for differences in the interacted terms in columns (1) and (2), and differences in the uninteracted terms in column (3).
\end{tablenotes}
\end{table}


\begin{table}[!htbp]
\caption{Evidence of statistical discrimination against transgender workers}
\label{tab_statistical_discrim}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/statistical_discrim.tex}
}
\begin{tablenotes}
\item	\footnotesize \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample includes the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phase 1 and 2. The outcome is whether the \textit{alternative worker} (rather than the male \textit{benchmark worker}) was selected in the private choices in the \textit{outcome round}. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female.   Controls include stratum fixed effects; dummies for the rights videos; whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; whether the alternative worker was shown on the right; phase fixed effects; the relative \# items offered; and the controls selected by double LASSO (see Section \ref{sec_lasso}). Relative reliability score is the reliability score (out of 10) of the alternative worker minus the benchmark worker. \textit{Reliability score is shown} is 1 when the reliability score is shown. Relative reliability score is coded as 0 when it is not shown.
\end{tablenotes}
\end{table}






\begin{table}[!htbp]
\caption{Heterogeneity by demographic characteristics}
\label{tab_het_demo}
\centering
\resizebox{0.8\textwidth}{!}{
\input{../../outputs/tables/basic_het.tex}
}
\begin{tablenotes}
\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01.  Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample includes the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phase 1 and 2. The columns together show the results from one regression. Column 1 shows the coefficients without interaction with \textit{3-person discussion}. Column 2 shows the coefficients when interacted with \textit{3-person discussion}. The outcome is whether the transgender worker was selected in the private outcome round, restricting analysis to only choices that include a transgender worker. Additional controls include stratum fixed effects; dummies for the rights videos; whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; whether the alternative worker was shown on the right; phase fixed effects; relative \# items offered; relative reliability score; whether the reliability score was shown.

%\item \textit{High persuasiveness score for discussants} is defined based on the index of persuasiveness seen in Appendix \ref{sec_persuasiveness}. The variable is 1 if the average score based on the listener's responses about the two discussants is higher than the average score based on the two discussants' responses about the listener. Intuitively, this describes whether the random selection of discussants selects people who are relatively more persuasive.
%\item \textit{Close relations with others in group} is defined based on the index of the closeness of relationships between participants in a group, seen in Appendix \ref{sec_group_relations}. For each participant, I use their responses to calculate relationship scores for the other two in their group. I take the average of these two scores. The variable used is 1 if this average is greater than the sample median, and 0 otherwise.

% when the participant rated others in their group to have average score that individual based on the index defined in Appendix \ref{sec_group_relations}.
%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
\end{tablenotes}\end{table}



\section{Additional results on rights videos}
\label{sec_appendix_videos}

\textbf{Manipulation check}. 
As a manipulation check, I show that participants' beliefs about the legal rights of transgender people (as measured by a summary index) are significantly affected by the legal rights video, but not by the rights messaging video (Appendix \autoref{tab_law_checks}).

\textbf{Interaction effects}. I do not find interaction effects between the \textit{legal rights} video and group discussions (\autoref{tab_video_interactions}); the reductions in discrimination caused by both combine approximately linearly ($p \in [\input{../../outputs/stats/p_vals_vid_discuss_interact.tex}]$). By contrast, there is weak evidence of a negative interaction effect between the \textit{rights messaging} video and group discussions, such that the rights messaging video has no detectable effect on discrimination in the group-discussion arms ($p\in[\input{../../outputs/stats/p_vals_video_placebo_discussion.tex}]$). 
%{\color{red}Similarly, measures of group norms show that while the rights messaging video has an effect on group predictions in the no-discussion arms, it does not do so in the discussion arms (\autoref{tab_video_interactions_norms}). (THIS ISN'T REALY SUPPORTED BY THE DATA, not enough power - possibly remove this table) }
This may be because the content of the \textit{rights messaging} video is very similar to the persuasive discourse in the discussion, therefore acting as a close substitute, whereas the \textit{legal rights} video provides additional informational content.


\textbf{Mechanism for legal rights videos}. The effect of informing participants about the legal rights of transgender people appears to be partially mediated by changes in perceived norms and perceived reliability. The videos have a significant effect on perceived norms of discrimination (\autoref{tab_videos_mechs}, columns 1--2). Participants predict that others will select transgender workers more, both in the wider community (2--3 p.p.) and in their group of 3 (4--6 p.p.). The videos also lead to small increases in whether a transgender worker is deemed likely to complete the delivery (\input{../../outputs/stats/effect_belief_vid.tex} p.p., \input{../../outputs/stats/effect_belief_vid_perc.tex}, \autoref{tab_videos_mechs}, column 5). By contrast, neither video has a detectable effect on attitudes, as measured by the list experiment or the questions on disapproval of discrimination (\autoref{tab_videos_mechs}, columns 3--4). The rights videos' effects could also be driven by fear of anti-discrimination laws being enforced against participants.




%In line with the \textit{expressive law hypothesis} \citep{benabouLawsNorms2011, sunsteinExpressiveFunctionLaw1996}, the videos have a significant effect on perceived norms of discrimination (\autoref{tab_videos_mechs}). Participants predict that others will select transgender workers more, both in the wider community (2--3 p.p.), and in their group of 3 (4--6 p.p.). The videos also lead to small increases in whether a transgender worker is deemed likely to complete the delivery (\input{../../outputs/stats/effect_belief_vid.tex} p.p., \input{../../outputs/stats/effect_belief_vid_perc.tex}, \autoref{tab_videos_attitudes}, column 3). By contrast, neither video has a detectable effect on attitudes, as measured by the list experiment or the questions on disapproval of discrimination (\autoref{tab_videos_attitudes}, columns 1-2). 


\begin{landscape}
\begin{table}[!htbp]
\caption{Legal rights video affects beliefs about the legal status of transgender people}
\label{tab_law_checks}
\resizebox{1.55\textwidth}{!}{
\input{../../outputs/tables/law_checks.tex}
}
\begin{tablenotes}
\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Randomization inference p-values are in brackets. Unit of observation is the participant. Sample includes all participants in all discussion-arm treatments, in both phase 1 and 2 of data collection. Controls include stratum fixed effects; dummies for the discussion-arm treatments; whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). \textit{Say trans have legal status} is an indicator for whether the participant responds yes to "Do transgender people have legal status?". \textit{Correctly name at least one legal right} indicates whether the participant was able to correctly name one legal right that transgender people hold in India in response to the question "What legal status do transgender people have?". \textit{Number of legal rights correctly named} is the number of correct legal rights named in response to this same question (coded as 0 if they say that transgender people do not have legal status). \textit{Not employing is illegal}: after listening to a discriminatory vignette ("Two people approach someone for a job: a man and a transgender. The employer rejects the transgender because they are transgender."), the participant said that the employer is breaking the law. \textit{Avoiding on street is illegal}: after listening to a second discriminatory vignette ("A woman avoids a transgender person on the street, because they are transgender."), the participant said that the woman is breaking the law. \textit{Summary index (Z)} is created by (i) normalizing each of the outcome variables in columns 1, 3, 4, and 5 by subtracting from the control-video mean and dividing by the control-video standard deviation; (ii) combining these normalized variables into an index with weights based on the inverse-covariance matrix \citep{andersonMultipleInferenceGender2008}.
%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
\end{tablenotes}
\end{table}
\end{landscape}

\begin{table}[!htbp]
\caption{Interactions between trans rights videos and discussions}
\label{tab_video_interactions}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/video_interactions.tex}
}
\begin{tablenotes}
\item	\footnotesize \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Unit of observation is the participant $\times$ choice level. Outcome is whether a participant chose the transgender worker in the private outcome round (restricting analysis to only choices with transgender workers). Sample in columns (1) and (2) includes only the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm, in both phases. \textit{$p$-val: (Rights messaging video $\vert$ 3-person discussion)} denotes the $p$-value on the test that the effect of the rights messaging video is 0 for participants in the 3-person discussion arm. Other p-values are defined analogously. Controls include stratum fixed effects; phase fixed effects (columns 1 and 2 only); whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; whether the alternative worker was shown on the right;  and the controls selected by double LASSO (see Section \ref{sec_lasso}).
\end{tablenotes}
\end{table}


\begin{table}[!htbp]
\caption{Effect of rights video on mechanism outcomes}
\label{tab_videos_mechs}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/video_mechs_combined.tex}
}
\begin{tablenotes}
\item \footnotesize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Sample includes all participants in both phases.
\item Column (1): The unit of observation is the participant. The dependent variable is the incentivized prediction of the proportion of other people (how many out of 20) in the study who pick a transgender person to receive a delivery when shown a specific pair of workers. Each participant makes 3 incentivized predictions, one of which includes a transgender worker. Only the choice involving the transgender worker is included for analysis. Column (2): The unit of observation is the participant $\times$ prediction. The dependent variable is whether the participant predicted that another person in their group selected a transgender worker in the private outcome round. The prediction is incentivized. Each participant made 2 predictions (one involving a transgender worker) for each of their 2 group members. The two predictions involving a transgender worker are included for analysis. Controls include stratum fixed effects; dummies for the discussion-arm treatments; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). 
%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
\end{tablenotes}
\end{table}



\section{Further evidence on persuasion}
\label{sec_appendix_persuasion}

%\textcolor{red}{The fact norm effect that it doesn't attenuate to 0 in \autoref{tab_mediation} does imply that even holding group norms constant, there is still some effect on private behaviour (indicating some private change in normative beliefs independent of perceived norms).}

\textbf{Amount of conversation.} If participants updated their personal normative beliefs or their beliefs about the prescriptive norm due to the discussion, we would expect \textit{more conversation} to lead to less discrimination (either because there are more signals about the norm, or because participants are exposed to more persuasive messages). The correlational evidence supports this: a 1 standard deviation increase in how much conversation there was about transgender workers (as rated by the enumerator) was correlated with listeners being \input{../../outputs/stats/coeff_amount_discussed_listener_trans.tex} p.p. more likely to select a transgender worker ($p$=\input{../../outputs/stats/amount_discussed_listener_trans.tex}).


%\footnote{This is not due to a correlation between overall conversation levels and selection of transgender workers: having more conversation about \textit{non}-transgender workers was correlated \textit{negatively} with selecting transgender workers.}

\textbf{Disagreement.} In line with the claim that the arguments made about transgender workers are more persuasive than those made about non-transgender workers, groups are less likely to \textit{disagree} when discussing transgender workers. Research assistants coded whether, during a discussion, some participants disagreed by arguing for opposing options. For choices involving a transgender worker, only 16\% disagreed (compared to 21\% for non-transgender pairs, $p$ of difference=\input{../../outputs/stats/pval_disagreement.tex}), possibly indicating more persuasive arguments.


%\textcolor{red}{One alternative story that is in line with persuasion would be that participants are predominantly being persuaded by pro-trans participants to \textit{act in accordance with their moral beliefs} by not discriminating, rather than being persuaded to change their broader attitudes towards transgender workers. For example, they might be persuaded that not selecting a transgender worker is a form of discrimination that violates their belief in equality. This mechanism is in line with evidence from psychology that suggests that persuasion is particularly effective when it increases the coherence of our beliefs (see e.g., \citealp{mercier2020not}, Ch. 4). This is \textcolor{red}{kind of similar to effects through beliefs about prescriptive norms...}.}


%\section{Additional background/context}
%
%
%Discrimination can also take other forms, such as being cut off from family support, housing discrimination, harassment, violence, and difficulty in accessing medical treatment and education \citep{ipsosGlobalAttitudesTransgender2018, u.s.statedept.2021CountryReports2021, malLetUsLive2015, ganjuStigmaViolenceHIV2017, shaikhEmpoweringCommunitiesStrengthening2016, babaTransgenderHealthHealthcare2018, chakrapaniHijrasSexWork2004, chakrapaniBarriersFreeAntiretroviral2011, sangamaTransgenderSurveyKerala2015}.
%
%Tamil Nadu is an appropriate context for studying anti-transgender discrimination, because despite seeing policy changes that favor transgender people (e.g., the state government constituting a Transgender Welfare Board),
%%\footnote{For example, the state government constituted a Transgender Welfare Board in 2008, and the Madras High Court made a judgement in 2021 that recommended policies for reducing anti-LGBTQ+ discrimination} 
%qualitative studies indicate that discrimination is persistent and widespread \citep{chakrapaniBarriersFreeAntiretroviral2011, unesconewdelhiExperiencesBullyingSchools2018, kumarExploringDiscriminationStigma2022, subramanianPsychosocialImpactQuality2009}.

\clearpage

\section{Alternative mechanisms}
\label{sec_alt_mechanisms}

In this section, I document evidence against a number of other mechanisms that might underlie the treatment effect of the discussion and the rights videos. 

%\textcolor{red}{ADD "BELIEFS ABOUT RELIABILITY" here??}

\textbf{Other photo characteristics}. If the transgender worker photos were observably different from non-transgender photos, this could have driven some of the treatment effects. For example, if transgender workers looked \textit{poorer}, the discussion's effect might be driven by changing preferences for hiring low-income workers. To evaluate this concern, I used a separate sample of 500 online respondents from Tamil Nadu to rate the characteristics of a set of 30 photos used in the study. Participants rated photos in terms of perceived income, religion, age, caste, education level, and how neatly workers were dressed. They also rated how comfortable they would be talking to the worker, how unsafe they would feel having the worker in their home, how worried they would be if the worker spoke to their family, and how unhappy their spouse would be if the participant spoke to the worker. There were substantial differences in the perceived characteristics of transgender workers compared to non-transgender workers --- 
e.g., 28\% of transgender photos were rated as being very likely to come from a Scheduled Caste, compared to only 19-20\% for male and female photos. Participants also perceive transgender workers as relatively poorer, more likely to be Hindu, less educated, and less neatly dressed compared to other workers. However, after controlling for the perceived characteristics of the worker photo, the results do not change qualitatively: the discussion still reduces discrimination by an estimated 20 p.p. (\autoref{tab_photo_rating_survey}). This suggests that the treatment effects are driven by changes in preferences for selecting transgender workers \textit{per se}, rather than by changes in preferences for any correlated characteristics such as caste or age.

%\textcolor{red}{\textbf{Beliefs about reliability.} To measure whether there were changes in the perceived reliability of transgender workers as a result of the discussion, participants were asked to say how likely they think a certain worker was to complete a delivery if they were hired.
%Beliefs about the reliability of transgender workers were not significantly affected by the discussion (\autoref{tab_attitudes_beliefs_norms}, panel A, column 3). While participants were \input{../../outputs/stats/effect_belief_reliability.tex} p.p. less likely to say that a worker is ``likely" or ``very likely" to complete the delivery when the worker is transgender ($p$ \input{../../outputs/stats/p_val_belief_reliability.tex}), this does not vary significantly across treatment conditions ($p$=\input{../../outputs/stats/p_val_belief_reliability_discussion.tex}).} %The null effect on beliefs matches the insignificant differences in the level of statistical discrimination across treatment conditions I described above (\autoref{tab_statistical_discrim}).

\textbf{Social image concerns that continue in the outcome round.} Even when participants made hiring choices in private in the outcome round (without their neighbors listening), their choices may have been affected by social image concerns. Knowing that their neighbors might see who delivered groceries to their home, they might choose a transgender worker to signal that they were non-discriminatory to their neighbors. To assess whether the treatment effects remained when shutting down this channel, I use a series of supplementary hiring choices. These \textit{private grocery pick-up} choices (detailed in Appendix \ref{sec_private_delivery}) were designed to be more robustly private than the main outcome in two ways. First, participants had to pick up grocery items from the team office instead of receiving the delivery at home, so that neighbors would not know which worker was chosen. Second, I adjusted the elicitation process so that the participants' responses were hidden from the surveyor giving the interview.%\footnote{}




The 3-person discussion still reduced discrimination for the private grocery pick-up choices (\autoref{tab_anon}).\footnote{Discrimination in the \textit{No discussion (private)} arm was stronger for these private outcomes than for the main hiring elicitation (\input{../../outputs/stats/anon_trans_penalty.tex} p.p., $p$$<$$0.001$). The more extreme discrimination may come from a perception of increased intensity of social contact between the participant and the chosen worker: the participant was told they would have to speak on the phone to the worker and then organize a time to come to the office \textit{alone} and speak to them for 15 minutes.} 
%
The discussion treatment effect on this outcome is large, although slightly smaller in magnitude than the main hiring outcome (\input{../../outputs/stats/anon_effect.tex} p.p., $p$=\input{../../outputs/stats/anon_effect_p.tex}). The \textit{legal rights} video also reduces discrimination significantly, with a similar magnitude to the main outcome (9 p.p., $p$$<$0.001). %The evidence on the \textit{rights messaging video} is less clear, with some specifications showing a significant effect and others showing a null effect. 
Taken together, the results suggest that social image concerns \textit{after} the discussion has ended are not sufficient to explain the measured treatment effects, although I cannot rule out that such concerns play some role.%\footnote{\textcolor{red}{The results also imply that \textit{altruistic} or \textit{paternalistic discrimination}, where participants decide not to select transgender workers to avoid putting the workers in an unsafe position, also do not drive the overall effects.}}
%Taken together, the results on this robustly private outcome show that the law and group discussions lead to a drop in discrimination. This implies that appearing pro-transgender to others \textit{after} the discussion has ended is not the only driver of the post-discussion reduction in discrimination. Still, the smaller coefficient on this outcome may be due to such social image concerns partially driving post-discussion choices.

%people are not changing their behavior \textit{after} the discussion has ended just because they want to \textit{appear} to be pro-transgender to others, i.e., because of social image concerns. Nevertheless, the smaller coefficient suggests that social image 











%"This robustly private outcome also shows a reduction in discrimination, suggesting it's not social image concerns, not a need to seem pro-transgender, that drive the decrease in the private outcome round."

%The fact that the reduction in discrimination also occurs when using this robustly private outcome suggests that the reduction in discrimination seen in the private outcome round is not implicitly driven by social image concerns, i.e., the desire to appear pro-transgender.









	
%	\subsection{Phase 2 mechanism treatments}
	
%	{\color{red}Should I present the treatment round estimates first? i.e. flow = discussion is super positive? Why is this? Then look at mechanism treatments to try and answer. (But this is less in keeping with my model because treatment round is less the focus - it's more on mechanisms??}



% {\color{red}\textbf{Reference asymmetric persuasion here??, e.g.,  \autoref{fig_het_by_positive_discussions}}}
 
 

%\textbf{LISTENER} \autoref{tab_effect_of_listening}


%Participants do therefore seem to be reacting to the treatment and changing their behavior to match their group-members, but this does not result in a mean shift in how much they discriminate. In terms of the framework from Section \ref{sec_framework}, this implies that $\gamma_1$ is greater than 0, since I show that within-group correlation is increasing in $\gamma_1$ (Appendix \ref{sec_model_convergence}). Intuitively, when participants care about matching their group-members' choices, participants will tend to converge in their choices.


%If social image concerns were driving people's propensity to choose transgender people in discussions (in terms of the framework in Section \ref{sec_framework}, if $E(\gamma_0 - \gamma_1 \hat{D}_{-i,j})$ had been non-zero), then we would expect to see a mean effect of the social image treatment on participants' treatment round choices (and if the discussions involved persuasion, on the outcome round choices too). Thus, the null result on the \textit{No discussion (public)} means that it is unlikely that social image concerns alone are driving participants to be more pro-trans in the discussions.\footnote{Appendix \autoref{tab_icc} shows that despite not shifting the level of discrimination, the \textit{No discussion (public)} treatment does have an effect on behavior. Within a group, there is a greater correlation of whether each member selects a transgender in the treatment round in the \textit{public} arm as compared to the \textit{private} arm, as measured by the intra-cluster correlation coefficient ($p = $\input{../../outputs/stats/ri_p_icc_private_public.tex}). All participants in a group are always shown the same delivery options in the \textit{treatment round}, regardless of their treatment status, making this comparison valid. Participants do therefore seem to be reacting to the treatment and changing their behavior to match their group-members, but this does not result in a mean shift in how much they discriminate. In terms of the framework from Section \ref{sec_framework}, this implies that $\gamma_1$ is greater than 0, since I show that within-group correlation is increasing in $\gamma_1$ (Appendix \ref{sec_model_convergence}). Intuitively, when participants care about matching their group-members' choices, participants will tend to converge in their choices.}






%nor did it feed into a mean change in discrimination in the subsequent private round. 
%
%
%
%\textbf{ASYMMETRIC PERSUASION TEST (because no treatment round effect, and still pro-trans persuasion)}
%
%"Effect of announce" table - but not enough power...
%
%\textbf{Implications for social image}
%
%\textbf{Reduced form effect - mean effect $E(\gamma ...)$ is 0}
%
%\textbf{But disguises that $\gamma_0 > 0$}
%
%
%
%
%If social image concerns were driving people's propensity to choose transgender people in discussions (in terms of the framework in Section \ref{sec_framework}, if $E(\gamma_0 - \gamma_1 \hat{D}_{-i,j})$ had been non-zero), then we would expect to see a mean effect of the social image treatment on participants' treatment round choices (and if the discussions involved persuasion, on the outcome round choices too). Thus, the null result on the \textit{No discussion (public)} means that it is unlikely that social image concerns alone are driving participants to be more pro-trans in the discussions.\footnote{Appendix \autoref{tab_icc} shows that despite not shifting the level of discrimination, the \textit{No discussion (public)} treatment does have an effect on behavior. Within a group, there is a greater correlation of whether each member selects a transgender in the treatment round in the \textit{public} arm as compared to the \textit{private} arm, as measured by the intra-cluster correlation coefficient ($p = $\input{../../outputs/stats/ri_p_icc_private_public.tex}). All participants in a group are always shown the same delivery options in the \textit{treatment round}, regardless of their treatment status, making this comparison valid. Participants do therefore seem to be reacting to the treatment and changing their behavior to match their group-members, but this does not result in a mean shift in how much they discriminate. In terms of the framework from Section \ref{sec_framework}, this implies that $\gamma_1$ is greater than 0, since I show that within-group correlation is increasing in $\gamma_1$ (Appendix \ref{sec_model_convergence}). Intuitively, when participants care about matching their group-members' choices, participants will tend to converge in their choices.}
%
%
%
%
%
%We can also compare the \textit{observer} to the \textit{non-observer} to measure the effect of being exposed to the choices of others in ones group. Observers were told what the others in their group chose in the treatment round before making their outcome-round choices. Non-observers were only told after the outcome round. This variation was designed to test whether being exposed purely to the choices of others in your group is enough to reduce discrimination, or whether the justifications, narratives, and explanations that are part of the discussion are crucial for explaining its persuasive power. \autoref{tab_main_phase_2} shows that there is no significant difference on average between the outcome-round choices of observers and non-observers ($p = $\input{../../outputs/stats/p_val_observer_non_observer.tex}). 
%
%The null effect could be driven either by (i) a lack of change in behavior in the treatment round, so that participants observe others acting in a way that does not persuade them or update their norms to be more pro-trans, or (ii) that simply observing others' choices is not persuasive, and the elements of the discussion (engaging in persuasion, generating narratives and justifications for choices, etc.) might be key to its effects.
%
%In \autoref{tab_effect_of_announce} shows evidence that the announcements of others' choices do not appear to be especially persuasive. To measure the persuasive effect of observing others choose a transgender individual, I calculate the proportion of times others in a participant's group chose a transgender in the treatment round, denoted $\pi_{ij} = P(\text{others in group selected trans in treatment round})$. Since these choices were affected by social image concerns, this quantity is endogenous to $i$'s behavior, and so we cannot just use this to measure the treatment effect of observing others' choices. However, since the allocation of individuals to the \textit{observer} and \textit{non-observer} roles was random, this means that the \textit{difference} between observers and non-observers in the effect of $\pi_{ij}$ causally identifies the effect of being told that others in your group chose transgender people. As shown in \autoref{tab_effect_of_announce} (Column 4), the estimated interaction effect is positive (\input{../../outputs/stats/effect_of_announce.tex}), but it is not statistically different from 0 ($p = $\input{../../outputs/stats/effect_of_announce_p_val.tex}). While I do not have power to reject small effects, this suggests that simply being told that others have selected a transgender worker is insufficient for generating large reductions in discrimination.\footnote{The announcements' relatively weak effects do not appear to stem from memory or comprehension issues, since participants in the \textit{No discussion (public)} arm remember the announcements. When asked later in the survey, they correctly recall whether others selected a transgender worker \input{../../outputs/stats/mem_check_choosing_only.tex} of the time (Appendix \autoref{fig_mem_check}). Nevertheless, participants were slightly more able to recall choices that were made in a discussion than choices that were announced to them. This suggests that the discussion -- and the narratives, justifications, and reasoning raised in the process -- may be more salient than bare choices. This is in line with recent evidence suggesting that stories are more memorable than statistics \citep{graeberStoriesStatisticsMemory2022}.}

% At the same time, participants are able to remember choices they (and others) made in a discussion more readily than choices that were announced to them. This suggests that the choices made in the discussion were more salient. It may be the debate itself, and the ensuing narratives, justifications and stories discussed about transgender people, that is particularly memorable, and perhaps particularly persuasive (in line with evidence on narratives being more memorable than statistics {\color{red}\textbf{Roth et al}}).} The narratives, justifications, and explanations for choices thus seem to be important for generating the large effects on the discussions.





%
%	
%	- LISTENER - same effect as taking part in the discussion... implies that even just being exposed to the arguments etc. is enough. Don't need to necessarily be an active participant to be persuaded...
	
	
%	- 2person and 3-person discussion not significantly different
 

%- No discussion (public) - no effect - 
%- SOCIAL IMAGE alone doesn't have an effect, implying it's some magic sauce in the discussion...
%- Announcement alone doesn't have an effect - suggesting that persuasive effect of \textit{being told} that others have chosen trans is not doing much in equilibrium
%- Note - ICC changes - gets higher in social image - suggesting something is going on... no mean effect
%
%(No R1 effect)


	
	
	

	
	
	
	
	
%	\section{Mechanism results}
%	\label{sec_secondary_outcomes}
	
%	\textit{Discrimination}:
%	\begin{enumerate}
%		\item \textbf{Follow-up discrimination}. In the follow-up survey, respondents will be asked to choose delivery options for 6 choice-pairs. Unlike in the main session, this choice will be explicitly hypothetical. I will use an analogous specification (based on equation \ref{eqn_main_spec}) to analyze whether changes in discrimination persist after 1 week. For this analysis, I will drop the small subset of individuals who actually receive a delivery from a transgender individual.	
%	\end{enumerate}

%\newpage

%\subsection{Discussion mechanisms}



%To understand how the discussion encourages people to discriminate less, I unpack here what goes on in the discussion. First, \autoref{fig_r1} shows that there is an especially strong effect of the discussion on reducing discrimination \textit{during} the treatment round of hiring, that is, when comparing the choices made in the discussion itself to the choices made by the control group in the first 4 pairs. 


%{\color{red}Talk more about asymmetric persuasion}




%0. Discrimination is less severe against trans when they are dominated \textbf{issue - why only in outcome round?}

%1. Negative discrimination is reduced, and positive discrimination is increased in outcome round (treatment effect roughly similar across all categories)

%2. Evidence of particularly strong positive discrimination in discussion (positive interaction effect; and talk about levels above 50\%)


%Footnote:




%- As expected, workers are more likely to be selected when they dominate, and less likely to be selected when they are dominated.

%- For both control and discussion groups in outcome round
%- In treatment round - especially for discussion group




%If we selecting trans people in the treatment round in the dominated case, that's a very strong signal and is likely to reduce discrimination later on (but it's not the only driver, because worker is trans x discussion (pooled) coefficient is still main driver). 

%Also treatment effect on outcome round is not conditional on worker dominating... suggests that both negative discrimination is reduced, and positive discrimination increased...



%- {\color{red}Suggests that both positive and negative discrimination are improved by discussion. There is actually positive discrimination on net in the discussion for the dominated cases... (link to pro-social reasons in discussion)}


\textbf{Contemplation}. %{\color{red}PRESENT AS MORE NEGATIVE }
%
%{\color{red}There is some Alex Imas paper on deliberation in intertemporal choices. Maybe have a look at it and their cites?}
Discussions may change people's hiring choices by making them think more carefully about their choices, or by allowing them to override an automatic discriminatory response \citep{devineStereotypesPrejudiceTheir1989, devineRoleDiscrepancyAssociatedAffect1993, plantActiveControlPrejudice2009, devineLongtermReductionImplicit2012}.
%or increasing the salience of some aspects of the choice because people deliberate more. As an example of this mechanism, work in psychology suggests that discrimination can be a habitual or automatic response , and only when people reflect on their choices do they override this automatic response and discriminate less. There are four suggestive pieces of evidence in favor of such a mechanism. 
%
There is some evidence for such increased contemplation.
%First, participants who take longer to respond to choices involving a transgender worker during the treatment round are more likely to select them
%(\autoref{tab_durations_corr}, column 1). 
%
Discussion participants take on average \input{../../outputs/stats/effect_duration_discussion.tex} seconds (\input{../../outputs/stats/effect_duration_discussion_perc.tex}) longer in the individual outcome-round choices, and are   \textit{less} likely to select a dominated option in the outcome round if they have been in a group discussion ($p$=\input{../../outputs/stats/p_val_dominated_mf_r2.tex}, \autoref{tab_dominates}, column 1), suggesting they are being more attentive. However, it is unlikely that this drives the treatment effects on discrimination, since longer response times are not correlated with being more likely to select a transgender worker ($p$=\input{../../outputs/stats/p_val_corr_duration_choice.tex}).

% Finally, participants correctly recall the choices that were made in the first round significantly more often when there was a discussion compared to when they are asked to recall their own individual choices (\autoref{fig_mem_check}). 
%
%  
%
%
%%in line with the claim that participants pay more attention to their choices after taking part in a discussion.
%
%
%
%Plausibly, going through an extensive discussion and explicit reasoning process about who to hire makes people take longer and act more deliberately during their individual choices.\footnote{Relatedly, participants take significantly longer to make each choice \textit{during} the discussion (i.e., during the treatment round), with a mean duration of over 60 seconds per choice as compared to \input{../../outputs/stats/mean_duration_control.tex} seconds in the \textit{No discussion (private)} arm (\autoref{fig_durations}, panel A). 
%However, this alone does not seem to drive the effects on the later outcome round choices. Even though participants in the \textit{No discussion (public)} round take significantly longer in the treatment round to make choices, this does not translate to increased response time in the outcome round ($p$-value: \input{../../outputs/stats/p_val_duration_choosing_only.tex}) and this treatment arm did not see major reductions in discrimination.} 
% This may allow them to override any automatic negative responses they feel towards transgender people. 
 
 
 
%In line with the claim that participants pay more attention to their choices after taking part in a discussion, participants are also .
%
%It does seems that participants' choices during the discussion itself are subject to more attention than their individual choices. 


% And participants correctly recall the choices that were made in the first round significantly more often when there was a discussion compared to when they are asked to recall their own individual choices (\autoref{fig_mem_check}). 


%\item \autoref{fig_mem_check} - people remember discussion choices better than their own earlier choices and announcements, so there may be a role for deliberation / attention. Link to lit on narratives/stories being more memorable (Roth et al)












% - talk about timing, paying more attention etc. Automatic discrimination, but when having to give reasons it's harder to discriminate

%Discussion makes people pay more attention - less likely to choose a dominated option for M/F - better at picking better options










%\textbf{Effect of group characteristics / composition} - not much heterogeneity, possibly closer groups lead to greater effect

%  \autoref{tab_het_demo} - very little heterogeneity by demographic charactersitics
%   \autoref{tab_het_group} - closer groups persuade each other more?







%	\begin{table}[htbp]
%
%	\centering
%	\caption{Treatment round choices}
%	\centering
%\footnotesize \textbf{Panel A:} 3-person discussion sample (Phases 1 and 2)	
%\vspace{-1em}
%
%	\normalsize
%		\label{tab_r1_main}
%		\begin{subtable}[t]{1\textwidth}
%\centering
%\resizebox{0.8\textwidth}{!}{
%\input{../../outputs/tables/r1_main.tex}
%}
%		\end{subtable}
%	\vspace{1em}
%	
%		\centering
%\footnotesize \textbf{Panel B:} Phase 2 Sample (Listeners excluded)
%\vspace{-1em}
%
%	\normalsize
%		\begin{subtable}[t]{0.8\textwidth}
%	\resizebox{\textwidth}{!}{
%		\input{../../outputs/tables/r1_phase_2.tex}
%	}
%		\end{subtable}
%	\footnotesize
%%\flushright \textit{Continued on next page...}
%\end{table}







%\subsection{Mechanism outcomes}
%\label{sec_mech_outcomes}


%{\color{red}Talk about group-predic being overestimate for discussion speakers, but not for listeners... $p$-value of the effect being different for speakers (pooled across 2- and 3- and listeners is \input{../../outputs/stats/p_val_group_predic_listener_vs_speakers.tex}. Reference the \autoref{fig_group_predic_phase2}. Discussion speakers (pooling 2-person and 3-person discussions) are predicted to choose transgender workers \input{../../outputs/stats/diff_group_predic_listener_vs_speakers.tex} percentage points more than discussion listeners ($p$=\input{../../outputs/stats/p_val_group_predic_listener_vs_speakers.tex}).}





%{\color{red}\textbf{TALK ABOUT BOOTSTRAPPING / discussion dynamics - increase in perceptions leads to more pro-trans behavior leads to increase in perceptions etc.}}




% \textit{Predicted choices (community)}. In column (1) of \autoref{tab_norms}, I show that the discussion did not have a significant effect on participants' predictions of the proportion of others in the study who picked the transgender worker in that third pair.

%\textit{Predicted choices (own group)}.  Column (2) of \autoref{tab_norms} shows that the discussion increased the probability of predicting that a neighbor would select a transgender individual by 18.8 percentage points (45\%). \autoref{fig_group_predic} also shows that on average in \textit{both} the control and the discussion group there are no misperceptions about the likelihood of choosing a transgender worker. People predict that others will select transgender people about 19 percentage points more after the discussion, and people indeed select transgender people more by approximately the same amount (16 percentage points).


%\textbf{Attitudes and beliefs}. In \autoref{tab_attitudes} I show how the discussion affects measures of individuals' attitudes towards and beliefs about transgender people. 
%
%\textit{List experiment}. In order to examine whether individuals' private attitudes towards transgender people outside the sphere of hiring were changed by the group discussion, we used a double list experiment \citep{droitcour2004item, glynnWhatCanWe2013}. It measured the proportion of people who agreed with the statement ``In general, if I see a transgender person, I walk away.''. Since the list experiment does not allow the enumerator or the researcher to infer \textit{which} statements the participant agrees with, it preserves the anonymity of their responses and so is less likely to be vulnerable to social desirability concerns than standard self-reported attitude questions. Enumerators read out two lists (A and B) that contained 6 non-sensitive statements about a participant's preferences. The statement about transgender people was randomly added to either list A or list B. Whether list A or list B was read first was also randomized. For each list, enumerators asked how many statements in the list they agreed with. Column (1) of \autoref{tab_attitudes} shows that the discussion does not have a significant effect on the proportion of people who agree with the anti-trans statement.
%
%\textit{Disapproval of discrimination}. 
%Participants were presented with two scenarios depicting instances of discrimination against transgender individuals and were asked to evaluate whether the discriminator's actions were acceptable or wrong. The first scenario involved an employer rejecting a transgender applicant for employment, while the second scenario involved a woman avoiding a transgender individual on the street. Column (2) of \autoref{tab_attitudes} shows that the discriminator's behavior was seen as wrong in 93\% of cases in the control group, and this did not change significantly in the discussion group.
%
%\textit{Perceived reliability}.  To measure whether there were changes in the perceived reliability of transgender workers as a result of the discussion, participants were asked to say how likely they think a certain worker was to complete a delivery if they were selected to deliver. Participants were shown two photos of workers previously shown during the hiring process: one worker was transgender, and the other was male. Column (3) of \autoref{tab_attitudes} shows that participants are 7 percentage points less likely to say that the transgender worker was likely or very likely to complete the delivery, but that this is not significantly affected by the discussion.
%
%
%\textbf{Norms}. In \autoref{tab_norms}, I evaluate how the discussion affects norms of behavior by testing how it affects participants' predictions of the choices of others in the study.
%
%\textit{Predicted choices (community)}. Participants first made incentivized predictions of the choices of others in the study whom they did not know. They were shown 3 pairs of delivery options, and truthfully told that 20 other people in the study had been shown those pairs. They had to predict how many of those 20 picked each option.  If they made the closest guess on average across all 3 pairs, they were entered into a lottery to win 3000 Rs.' worth of additional items. 2 of the 3 pairs were male-to-male comparisons; the third pair compared a male and a transgender. In column (1) of \autoref{tab_norms}, I show that the discussion did not have a significant effect on participants' predictions of the proportion of others in the study who picked the transgender worker in that third pair.
%
%
%\textit{Predicted choices (own group)}. Participants then made incentivized predictions of the hiring choices of the other two people in their group. For each of the other two group members, they were asked to predict which option they chose for two pairs of delivery options. For each other person, one choice-pair compared a male and a male, and another compared a male and a transgender. If they correctly guessed all 4 combinations they were entered into a second lottery to win a separate prize, also worth 3000 Rs. Column (2) of \autoref{tab_norms} shows that the discussion increased the probability of predicting that a neighbor would select a transgender individual by 18.8 percentage points (45\%). \autoref{fig_group_predic} also shows that on average in \textit{both} the control and the discussion group there are no misperceptions about the likelihood of choosing a transgender worker. People predict that others will select transgender people about 19 percentage points more after the discussion, and people indeed select transgender people more by approximately the same amount (16 percentage points).
		





%\subsection{Alternative mechanisms}


%\textbf{Confounders}. In \autoref{tab_confound} and \autoref{tab_confound_interact} I examine possible confounders.

%{\color{red}WRITE THESE RESULTS BETTER}



%We included two  recall tasks in which participants have to restate as many items as possible from a list of items, one of which includes the word "transgender". The probability of recalling the word transgender, conditional on the number of other items recalled, is used as a measure of the salience of the idea of being transgender.



%Participants were read two lists containing a mix of words mostly related to deliveries, everyday objects, and identity. After each list was read out once by the enumerator, participants were asked to repeat as many words as they could from the list. 

%The measure of salience is whether they recalled the word "transgender", conditional on the total number of other words they recalled. 

%Column (1) of \autoref{tab_confound} shows that this measure of salience is not significantly affected by the discussion.


\textbf{Experimenter demand effects}. If participants wanted to please the surveyors or researchers, then those who correctly guessed the purpose of the study may have discriminated less against transgender workers \citep{dequidtMeasuringBoundingExperimenter2018}. 
%For this to undermine the treatment effect estimates, participants would have to guess the purpose of the experiment more often in the discussion groups, and this would result in a positive interaction between those who correctly guessed the purpose and the discussion treatment.  
To measure this,  we asked respondents to report their beliefs about the purpose of the study twice during the main survey (immediately after the hiring choices, and again at the very end of the session) using an open-ended question. I classify people as having correctly guessed the study's purpose if they mentioned transgender people.
%said it was to measure preferences for hiring transgender individuals, or otherwise mentioned transgender people.
I find no evidence that experimenter demand effects confound the main treatment effects. \input{../../outputs/stats/purpose_0_mean.tex} of participants correctly guess the purpose of the study after the main hiring round, and \input{../../outputs/stats/purpose_2_mean.tex} correctly guess it by the end of the survey.  However, discussion participants are no more likely to guess the purpose of the study at either stage than the control participants (\autoref{tab_confound}, columns 2 and 3), and in fact are more likely to think that the study is about delivery preferences (\autoref{fig_perceived_purpose}).\footnote{This does not mean participants were unaware that they were discussing transgender workers with their group members, only that they did not think that this was the purpose of the study.} Moreover, there is no detectable difference in the treatment effects for people who do and do not correctly guess the study purpose (\autoref{tab_confound_interact}, columns 1 and 2).
%\footnote{When examining all categories of responses that participants gave, there are significant differences in the perceived purpose of the study across arms (\autoref{fig_perceived_purpose}). However, since the probability of \textit{correctly} guessing the purpose remains unchanged across treatment arms, it is unlikely that these differences could drive the large observed changes in the willingness to select transgender workers.}
 
 
%\autoref{fig_perceived_purpose}
 
  While the rights videos did increase the likelihood of a participant correctly guessing the purpose of the experiment from 5\% to 10\% ($p$$<$0.001), those who correctly guessed did not drive the reductions in discrimination seen in the discussion groups (\autoref{tab_video_confounder_interact}, columns 1 and 2). These tests do not fully rule out \textit{subconscious} demand effects, but the \textit{Legal rights video} likely represents the upper bound on such demand effects, and has a substantially smaller treatment effect than the \textit{3-person discussion}, suggesting that the discussion's effects are not driven by experimenter demand.

%\textcolor{red}{Add fig/table on which were the perceived purposes of the experiment... (from reviewer)}

%\textcolor{red}{Add discussion on whether persausion is happening ``subconsciously''}

% REVIEWER COMMENT:
%A standard concern about experiments is experimenter-demand effects, so it is encouraging that only around 10% of participants seem to correctly guess the survey's purpose. However, this number strikes me as surprisingly low, given that the author notes many of the discussions are "dominated" by pro-trans speakers. In fact, the 10% number is made even more shocking when juxtaposted with the very high persuasion rate implied by the authors' estimates. People are somehow being persuaded to a massive degree without even realizing what they are being persuaded about. It would be useful to know exactly how the question about the survey's purpose was asked, whether everyone gave a non-trivial response, etc. And to have a discussion in the paper on if/how it is plausible to have so much (subconscious?) persuasion going on.




\textbf{Social desirability bias}. To measure a participant's propensity to give socially desirable answers, at baseline I elicited a shortened version of the \citet{crowne1960marlowe} module, which has been used elsewhere in India for a similar purpose \citep{dharReshapingAdolescentsGender2022a}. The questions ask whether the respondent has a number of ``too good to be true" traits (see Appendix \ref{sec_data_sdb}). I find no evidence that the results are driven by a participant's desire to give socially desirable answers to the enumerator. The treatment effects of the discussion and the rights videos are not significantly larger for individuals with an above-median social desirability score (\autoref{tab_confound_interact}, column 3; \autoref{tab_video_confounder_interact}, column 3).


\textbf{Salience}. Simply increasing the salience of the idea of being transgender does not appear to be the key driver of the treatment effects. To measure this, I included a recall task in which participants have to restate as many items as possible from two lists of items, one of which includes the word ``transgender". The probability of recalling the word transgender, conditional on the number of other items recalled, is used to measure the salience of the idea of being transgender. Salience actually decreases in the 3-person discussion arm (\autoref{tab_confound}, column 1), and the effect on discrimination is not significantly stronger for participants who remembered the word transgender (\autoref{tab_confound_interact}, column 4).%\footnote{Similarly, \autoref{fig_mem_check_trans_non_trans} shows that participants do not appear to be significantly more likely to correctly recall choices that involved transgender workers (compared to non-transgender workers) from earlier rounds across all treatment arms ($p$=\input{../../outputs/stats/p_val_mem_check_diff_trans.tex}), suggesting that they are not significantly more salient than other choices.} The effects of the rights videos also do not seem to be driven by increased salience, since the videos do not have a significant effect on the probability of remembering the word ``transgender" (\autoref{tab_video_confounders}).






%The treatment effect of the videos is attenuated among individuals with a below-median social-desirability score (), raising concerns that socially desirable responses are driving the treatment effect. I cannot rule out these concerns, but they seem unlikely to be driving the full effect of the legal rights video. The legal rights video also led to strong reductions in the private grocery pick-up choices, which were hidden from the enumerator (\autoref{tab_videos_anon_choices}). In addition, if the effect of the law is driven by a desire to conform with a prevailing social norm, we might expect the results to be driven by those with a high social desirability score anyway.





\textbf{Cheap talk}. To examine the robustness of the results to variation in the stakes, for a subsample of 582 individuals in phase 1 of data collection, I cross-randomized whether the participants were (truthfully) told that they would receive 1 delivery (N=288) or 3 deliveries (N=294) from the \textit{same} worker. If the results were driven by experimenter demand effects, or by social image benefits that outweigh the cost of a \textit{single} interaction with a trans worker, then receiving 3 deliveries would reduce the treatment effect of the discussion. 
While the people who are offered 3 deliveries discriminate more on average, the reduction in discrimination due to the discussion is still large and robust in the 3-delivery case (\autoref{tab_high_stakes}, 14 p.p., $p$=0.013), and the interaction between the treatment effect and the number of deliveries is close to 0 and insignificant ($p$=\input{../../outputs/stats/high_stakes_p_val.tex}).
%To test whether the results are driven by low stakes decisions, for a subsample of 582 individuals, I cross-randomized whether the participants were told that they would receive 1 delivery or 3 deliveries from the same worker (see Section \ref{sec_high_stakes}). \autoref{} shows that 
%
 The main effects of the discussion are therefore unlikely to be driven by the relatively low stakes of a single interaction. 
 %The evidence on the rights videos is more mixed: the point estimate suggests that the effect of the videos is smaller when participants are offered 3 deliveries, although I cannot detect a significant difference (\autoref{tab_high_stakes_video}). The effects of informing people about transgender rights may therefore be attenuated in higher-stakes situations.


%{\color{red}To examine the robustness of the results to variation in the stakes, for a subsample of 582 individuals in phase 1 of data collection, I cross-randomized whether the participants were (truthfully) told that they would receive 1 delivery (N=288) or 3 deliveries (N=294) from the same worker. 

%The latter group were told that, based on their choices and the random selection in the hiring elicitation, they would receive 3 deliveries from the \textit{same} worker each time, and that the items they received each time would be of the same value as the items they selected. If participants choose transgender workers purely out of a desire to please the experimenter or to look good in front of their neighbors, while also bearing a utility cost each time they have to interact with a transgender person, then the participants offered 3 deliveries would discriminate more. And if experimenter demand effects drove the effect of discussions, we would expect the treatment effect to be smaller in the 3-delivery group.
%}

%\textbf{Other mechanisms underlying the discussion}. Several other features of the process differ between participants involved in a discussion and those that select individually.\footnote{These differences include: (i) the group setting, which may affect participants' mood or pro-sociality, or give them a sense of ``safety in numbers''; (ii) the icebreaker discussion, which may relax them or make them less suspicious; (iii) being shown the worker profiles on paper sheets instead of on the enumerator's tablet; and (iv) the longer delay between the treatment round and the outcome round, resulting from the transition from a common group space to a private space.} 
%\textbf{ADD DATA ON TIME - duration}
%While these features are intuitively minor, there is a concern that they may drive the observed differences in outcomes between the discussion groups and the \textit{No discussion (private)} arm.
% However, these features cannot explain the entire effects of the discussions, because they are also shared by the  \textit{non-observers} in the \textit{No discussion (public)} arm, who are not significantly more likely to select a transgender worker in the outcome round. %In other words, the discussion must have a large effect on discrimination above and beyond the effects of the group setting, the icebreaker, the paper sheets, and the time-gap between treatment and outcome elicitation.

\textbf{Facilitator influence}. One concern about the experimental design is that the way the communication was structured---mediated through a facilitator---may have inhibited anti-trans expression and driven the positive treatment effects. While it is challenging to test this directly (the enumerator who facilitates the discussion is always present), the pattern of heterogeneity does not support this interpretation. Using the number of words said by facilitators in the discussion as a proxy of how much influence they exert, I find that when facilitators influence the discussions about transgender workers more, this is associated with participants being \textit{less} likely to choose a transgender worker (\autoref{enumerator_influence}). This correlational pattern appears incompatible with facilitator influence driving the positive effects. Instead, the reduced form estimate may be a lower bound of the true effect if facilitators endogenously aim to influence groups more in cases where the group appears to be initially more discriminatory.





\begin{table}[!htbp]
\caption{Discussion effect is robust to controlling for other photo characteristics}
\label{tab_photo_rating_survey}
\centering
\resizebox{0.8\textwidth}{!}{
\input{../../outputs/tables/photo_rating_survey_diffs.tex}
}
\begin{tablenotes}
\item \scriptsize	\textit{Notes}: This table shows the effect of the discussion on the probability of choosing the alternative worker when controlling for the \textit{characteristics} of the photos. Photo characteristics were measured using a supplementary online survey (Dec 2023--Jan 2024), in which a sample of 500 new participants reported their perceptions of whether worker photos looked like they were rich, old, from a scheduled caste/tribe, educated, their most likely religion, and whether they were neatly dressed, all using 4-option Likert scales. They also rated photos based on whether they would (i) feel comfortable talking to the worker; (ii) feel unsafe if the worker visited their home; (iii) feel worried if the worker spoke to their family; (iv) think that their spouse would be unhappy if they spoke to the worker. Participants were recruited using Facebook advertisements, were 50\% female, and were all current residents of Tamil Nadu. A subset of 30 photos (10 male, 10 female, 10 transgender) were rated. Each photo received between 74 and 98 ratings. Ratings were converted into Z-scores. The explanatory variables used are the \textit{differences} in the Z-scores between the alternative worker and the benchmark worker. The outcome is whether the participant selected the alternative worker in the private outcome round. * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01.  Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Sample is phase 1 and 2, only No discussion (private) and 3-person discussion arms.
%\textit{High persuasiveness score for discussants}: Above median score for the other two participants in a group on an index of persuasiveness. Index is constructed using a weighted sum of the ratings out of 10 given for the following character traits of other participants: (i) confident; (ii) quiet; (iii) like a leader; (iv) shy; (v) talkative; (vi) admirable; (vii) inspiring. See Section \ref{sec_persuasiveness} for details.
%
%\textit{Close relations with others in group}: Above median score on an index of perceived relationships with other participants in the group (see section \ref{sec_group_relations} for full details). The index is constructed using a weighted sum of (i) whether the other participant is a close family member, (ii) another family member, (iii) a friend, or (iv) simply a neighbor; (v) how long they have known the other participant; (vi) how often they talk to the other participant; (vii) how often they ask the other participant for advice; (viii) how often they ask for recommendations for what to buy; (ix) how often they tell secrets to the other participant. For each participant, I take the mean score of their rating for the two other participants in their group to get a score at the participant level.
%
%\item * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01.  Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample in columns 1-2 includes only \textit{No discussion (private)} and \textit{listeners} in the \textit{2-person discussion} arm. Sample in columns 3-4 includes only \textit{No discussion (private)} and \textit{3-person discussion} arms. Only phase 2 of data collection is included (when group relationships were elicited). The outcome is whether the transgender worker was selected in the private outcome round, restricting analysis to only choices that include a transgender worker. Additional controls include stratum fixed effects; dummies for the rights videos; whether the individual was randomized into being offered 3 deliveries or 1 delivery, or was not part of this randomization; whether the alternative worker was shown on the right; phase fixed effects; relative \# items offered; relative reliability score; whether the reliability score was shown.
%\item \textit{High persuasiveness score for discussants} is defined based on the index of persuasiveness seen in Appendix \ref{sec_persuasiveness}. The variable is 1 if the average score based on the listener's responses about the two discussants is higher than the average score based on the two discussants' responses about the listener. Intuitively, this describes whether the random selection of discussants selects people who are relatively more persuasive.
%\item \textit{Close relations with others in group} is defined based on the index of the closeness of relationships between participants in a group, seen in Appendix \ref{sec_group_relations}. For each participant, I use their responses to calculate relationship scores for the other two in their group. I take the average of these two scores. The variable used is 1 if this average is greater than the sample median, and 0 otherwise.

% when the participant rated others in their group to have average score that individual based on the index defined in Appendix \ref{sec_group_relations}.
%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
\end{tablenotes}
\end{table}



\begin{table}[!htbp]
\caption{Effect of discussion on private grocery pick-up choices (phase 2 only)}
\label{tab_anon}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/anon_choices.tex}
	}
	\begin{tablenotes}

\item \footnotesize	\textit{Notes}:  * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample includes all participants in phase 2.  Column (3) only includes choices that involved a transgender worker. Participants saw 4 options, and were asked which worker they would prefer to organize a private grocery pick-up with. Neither the enumerator nor a participants' group members knew what they selected. In columns (1) and (2), the outcome is whether the \textit{alternative worker} (rather than the male \textit{benchmark worker}) was selected the private grocery pick-up round. In column (3), it is whether the transgender worker was selected. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female. 
%The mean of the dependent variable when the worker is trans and in the \textit{No discussion (private)} arm indicates that the transgender worker was selected (rather than the male benchmark worker) \input{../../outputs/stats/p_choose_trans_control.tex} of the time. The mean when the worker is male or female in the \textit{No discussion (private)} arm is above 50\% because participants on average prefer female alternative workers to the male benchmark workers.
The specification used is seen in equation \ref{eqn_main_spec}.  Controls include stratum fixed effects; dummies for the discussion-arm treatments; whether the alternative worker was shown on the right; phase fixed effects; and the controls selected by double LASSO (see Section \ref{sec_lasso}). In column (2), controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown.
\end{tablenotes}
\end{table}





	\begin{table}[!htbp]
\caption{Discussion participants are not more likely to guess purpose of the experiment and are less likely to remember the word "transgender"}
\label{tab_confound}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/confounders.tex}
}
\begin{tablenotes}
\item	\footnotesize \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. p-values are in brackets, and use randomization inference for the \textit{3-person discussion} coefficients. Unit of observation is the participant level. Sample includes the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm. Column (1) includes only phase 1, since salience module was only included in phase 1. Columns (2) and (3) include both phases 1 and 2. \textit{Column (1)}. Participants were read two lists of words, described in Section \ref{sec_data_salience}, and were asked to recall as many of the words as possible. Outcome is whether the participant remembered the word transgender. I control for the proportion of other words remembered. \textit{Columns (2) and (3).} Participants were asked what they thought the purpose of the study was twice: once after the main outcome round (column 2), and again at the end of the session (column 3). I classify people as having correctly guessed the study's purpose if they say it is to measure preferences for hiring transgender individuals. Outcome is whether they correctly guessed the purpose of the study.
\item Additional controls include: stratum fixed effects; phase fixed effects (for columns 2 and 3 only); dummies for rights videos; and controls selected by double LASSO (see Section \ref{sec_lasso}). 
\end{tablenotes}
\end{table}



\begin{figure}[!htbp]
		
		\centering
		\caption{Perceived purpose of the experiment}
		\resizebox{\textwidth}{!}{
			\includegraphics[width=\linewidth]{../../outputs/figs/purpose_barchart.pdf}
			}
		\label{fig_perceived_purpose}
		\begin{tablenotes}
			\footnotesize
			\item  \textit{Notes}: Unit of observation is the participant level. Participants are asked what they believe the purpose of the study is twice: once immediately after the main hiring outcome round, and again at the end of the survey. Outcome on the y-axis is whether the participant cited the reason. Confidence intervals are based standard errors that are clustered at the group-of-3 level. To test whether the composition of perceived purposes changes, I regress the treatment status on indicator variables for each of the perceived purposes. The joint F stat for the coefficient on all the indicator variables is \input{../../outputs/stats/f_purpose_0} ($p$=\input{../../outputs/stats/f_purpose_0_p}) for after the outcome round, and  \input{../../outputs/stats/f_purpose_2} ($p$=\input{../../outputs/stats/f_purpose_2_p}) for the end of the survey.
%			\item F-stat of test for coefficient of 0 on all perceived purposes when 
			%The outcome is whether the participants selected the comparator in the treatment round of their hiring choices.
		\end{tablenotes}
	\end{figure}



	\begin{table}[!htbp]
\caption{Treatment effect is not driven by people who correctly guessed the purpose of the experiment, people with high social desirability scores, or people for whom "transgender" was salient}
\label{tab_confound_interact}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/confounder_interactions.tex}
}
\begin{tablenotes}
			\scriptsize
%			\item  \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. 
%			\item \textbf{Only choice-pairs from the outcome round that include a transgender photo are included. The dependent variable in all columns is whether the transgender person was selected. A participant correctly guessed the purpose if they said that the purpose of the study includes measuring preferences for hiring transgender people. }
%			\item 
			\item \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. p-values are in brackets, and use randomization inference for the \textit{3-person discussion} coefficients. Unit of observation is the participant $\times$ choice level. Sample includes the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm. Columns (1) and (2) include both phases 1 and 2. Columns (3) and (4) include only phase 1, when the SDB and salience modules were included. Only choices that include a transgender worker are included. The outcome is whether the participant chose the transgender worker in the private outcome round. \textit{Columns (1) and (2)}. Participants were asked what they thought the purpose of the study was twice: once after the main outcome round (column 1), and again at the end of the session (column 2). I class people as having correctly guessed the study's purpose if they say it is to measure preferences for hiring transgender individuals. \textit{Column (3)}. SDB score is the social desirability score based on the  \citet{crowne1960marlowe} index, described in  Section \ref{sec_data_sdb}. \textit{Column (4)}. Participants were read two lists of words, described in Section \ref{sec_data_salience}, and were asked to recall as many of the words as possible. \textit{Transgender word remembered} indicates that the participant recalled the word "transgender". \textit{Above median proportion of non-trans word remembered} indicates that the participant remembered more than 9 out of 17 of the other words in the two lists. Additional controls in all columns include: stratum fixed effects; phase fixed effects (for columns 1 and 2 only); dummies for rights videos; and controls selected by double LASSO (see Section \ref{sec_lasso}). 
		\end{tablenotes}
\end{table}


\begin{table}[!htbp]
	\caption{No significant differences in effect of the rights videos for participants who correctly guess the purpose}
	\label{tab_video_confounder_interact}
	\resizebox{\textwidth}{!}{
		\input{../../outputs/tables/confounder_interactions_video.tex}
	}
	\begin{tablenotes}
		\item \scriptsize	\textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. p-values are in brackets, and use randomization inference for the \textit{3-person discussion} coefficients. Unit of observation is the participant $\times$ choice level. Sample includes the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm. Columns (1) and (2) include both phases 1 and 2. Columns (3) and (4) include only phase 1, when the SDB and salience modules were included. Only choices that include a transgender worker are included. The outcome is whether the participant chose the transgender worker in the private outcome round. \textit{Columns (1) and (2)}. Participants were asked what they thought the purpose of the study was twice: once after the main outcome round (column 1), and again at the end of the session (column 2). I class people as having correctly guessed the study's purpose if they say it is to measure preferences for hiring transgender individuals. %\textit{Column (3)}. SDB score is the social desirability score based on the  \citet{crowne1960marlowe} index, described in  Section \ref{sec_data_sdb}. \textit{Column (4)}. Participants were read two lists of words, described in Section \ref{sec_data_salience}, and were asked to recall as many of the words as possible. \textit{Transgender word remembered} indicates that the participant recalled the word "transgender". \textit{Above median proportion of non-trans word remembered} indicates that the participant remembered more than 9 out of 17 of the other words in the two lists. Additional controls in all columns include: stratum fixed effects; phase fixed effects (for columns 1 and 2 only); dummies for rights videos; relative \# of items offered; relative reliability score; a dummy for whether the reliability score is shown; and controls selected by double LASSO (see Section \ref{sec_lasso}).
		%\item Column (1): the dependent variable is the number of statements the participant agreed with on each of the lists of statements shown to them. Each participant sees both List A and List B, and the anti-trans statement ("In general, if I see a transgender person, I walk away") is randomly included in either List A or List B.
		%\item Column (2): the enumerator describes two scenarios, one in which an employer rejects a transgender individual, and another in which a woman avoids a transgender individual on the street. The dependent variable is whether the participant says the person's actions are "wrong".
		%\item Column (3): the dependent variable is whether the participant says a worker is likely or very likely to complete a delivery after being shown a photo of the worker.
	\end{tablenotes}
\end{table}


\begin{table}[!htbp]
\caption{Discussion effect is robust to increasing the stakes by offering 3 deliveries from the same worker}
\label{tab_high_stakes}
\resizebox{\textwidth}{!}{
\input{../../outputs/tables/high_stakes.tex}
	} 
	
	\begin{tablenotes}
\item	\footnotesize \textit{Notes}: * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01. Standard errors are clustered at the group-of-3 level and are in parentheses. randomization inference p-values are in brackets. Unit of observation is the participant $\times$ choice level. Sample includes only the subsample of \input{../../outputs/stats/n_high_stakes_sample.tex} individuals in phase 1 who were randomized into either receiving 1 delivery (N=\input{../../outputs/stats/n_low_stakes.tex}) or 3 deliveries (N=\input{../../outputs/stats/n_high_stakes.tex}). Participants who were offered 3 deliveries were (truthfully) told that they would receive 3 deliveries from the same worker, giving items of the same value each time. Phase 1 only included the \textit{3-person discussion} arm and the \textit{No discussion (private)} arm. Column (3) only includes choices that involved a transgender worker. 
In columns (1) and (2), the outcome is whether the \textit{alternative worker} (rather than the male \textit{benchmark worker}) is chosen in the private choices in the \textit{outcome round}. In column (3), it is whether the transgender worker was selected. $\textit{Worker is trans} = 1$ when the alternative worker is transgender, and is 0 when the alternative worker is male or female. The specification used is seen in equation \ref{eqn_main_spec}.  Controls include stratum fixed effects; dummies for the rights videos; whether the alternative worker was shown on the right; and the controls selected by double LASSO (see Section \ref{sec_lasso}). In column (2), controls are interacted with \textit{Worker is trans}, so the coefficient on \textit{Worker is trans} is not shown. Columns (2) and (3) also include controls for the relative \# items offered by the alternative worker, the relative reliability score of the worker, and a dummy for whether the reliability score was shown.
\end{tablenotes}
\end{table}




\begin{table}[!htbp]
\caption{Facilitator influence: more input from discussion facilitator about transgender choices is not associated with choosing transgender workers}
\label{enumerator_influence}
\centering
\resizebox{0.8\textwidth}{!}{
\input{../../outputs/tables/enumerator_influence.tex}
}
\begin{tablenotes}
\item \scriptsize	\textit{Notes}: Sample only includes 3-person discussion participants. Word counts come from discussion transcript data. \textit{\# words} measures are converted to Z-scores so they have mean of 0 and standard deviation of 1. \textit{Prop. words from facilitator} is the number of words from the facilitator as a proportion of words from both facilitator and respondents. Facilitators say \input{../../outputs/stats/discussion_length_words_lead_trans.tex} words on average about trans choices (\input{../../outputs/stats/discussion_ratio_lead_trans.tex} of total words across facilitators and respondents); and \input{../../outputs/stats/discussion_length_words_lead_non_trans.tex} words about non-trans choices (\input{../../outputs/stats/discussion_ratio_lead_non_trans.tex} of total). Outcome and controls are same as \autoref{tab_main}, column 3. 



%\input{../../outputs/stats/discussion_length_words_trans.tex}

%\input{../../outputs/stats/discussion_length_words_non_trans.tex}







%This table shows the effect of the discussion on the probability of choosing the alternative worker when controlling for the \textit{characteristics} of the photos. Photo characteristics were measured using a supplementary online survey (Dec 2023--Jan 2024), in which a sample of 500 new participants reported their perceptions of whether worker photos looked like they were rich, old, from a scheduled caste/tribe, educated, their most likely religion, and whether they were neatly dressed, all using 4-option Likert scales. They also rated photos based on whether they would (i) feel comfortable talking to the worker; (ii) feel unsafe if the worker visited their home; (iii) feel worried if the worker spoke to their family; (iv) think that their spouse would be unhappy if they spoke to the worker. Participants were recruited using Facebook advertisements, were 50\% female, and were all current residents of Tamil Nadu. A subset of 30 photos (10 male, 10 female, 10 transgender) were rated. Each photo received between 74 and 98 ratings. Ratings were converted into Z-scores. The explanatory variables used are the \textit{differences} in the Z-scores between the alternative worker and the benchmark worker. The outcome is whether the participant selected the alternative worker in the private outcome round. * p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01.  Standard errors are clustered at the group-of-3 level and are in parentheses. Standard p-values are in brackets. Sample is phase 1 and 2, only No discussion (private) and 3-person discussion arms.
\end{tablenotes}
\end{table}

	




		
\end{document}

